From lldb-commits at lists.llvm.org Mon Oct 2 01:02:43 2023 From: lldb-commits at lists.llvm.org (Mariya Podchishchaeva via lldb-commits) Date: Mon, 02 Oct 2023 01:02:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [Clang] Fix crash when ill-formed code is treated as a deduction guide (PR #67373) In-Reply-To: Message-ID: <651a7923.170a0220.6c10f.0223@mx.google.com> https://github.com/Fznamznon approved this pull request. https://github.com/llvm/llvm-project/pull/67373 From lldb-commits at lists.llvm.org Mon Oct 2 07:23:22 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 07:23:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask` (PR #67723) In-Reply-To: Message-ID: <651ad25a.050a0220.f04c3.885a@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/67723 >From 6aabc3c10ea2d587120b74966b7ce96f9b8167af Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 28 Sep 2023 13:35:07 -0500 Subject: [PATCH 1/2] [OpenMPIRBuilder] Remove wrapper function in `createTask` This patch removes the wrapper function in `OpenMPIRBuilder::createTask`. The outlined function is directly of the form that is expected by the runtime library calls. This also fixes the global thread ID argument, which should be used whenever `kmpc_global_thread_num()` is called inside the outlined function. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 106 ++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 56 +++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 51 +++------ 3 files changed, 99 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9c70d384e55db2b..54012b488c6b671 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1496,6 +1497,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { + // We create a temporary i32 value that will represent the global tid after + // outlining. + SmallVector ToBeDeleted; + Builder.restoreIP(AllocaIP); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); + ToBeDeleted.append({TID, TIDAddr}); + if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1523,41 +1532,27 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + // Fake use of TID + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); + BinaryOperator *AddInst = + dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); + ToBeDeleted.push_back(AddInst); + OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, - Dependencies](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - - // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + OI.ExcludeArgsFromAggregate = {TID}; + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); // HasShareds is true if any variables are captured in the outlined region, // false otherwise. - bool HasShareds = StaleCI->arg_size() > 0; + bool HasShareds = StaleCI->arg_size() > 1; Builder.SetInsertPoint(StaleCI); // Gather the arguments for emitting the runtime call for @@ -1595,7 +1590,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { AllocaInst *ArgStructAlloca = - dyn_cast(StaleCI->getArgOperand(0)); + dyn_cast(StaleCI->getArgOperand(1)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -1606,31 +1601,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); } - - // Argument - task_entry (the wrapper function) - // If the outlined function has some captured variables (i.e. HasShareds is - // true), then the wrapper function will have an additional argument (the - // struct containing captured variables). Otherwise, no such argument will - // be present. - SmallVector WrapperArgTys{Builder.getInt32Ty()}; - if (HasShareds) - WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".wrapper").str(), - FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - // Emit the @__kmpc_omp_task_alloc runtime call // The runtime call returns a pointer to an area where the task captured // variables must be copied before the task is run (TaskData) CallInst *TaskData = Builder.CreateCall( TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, - /*task_func=*/WrapperFunc}); + /*task_func=*/&OutlinedFn}); // Copy the arguments for outlined function if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(0); + Value *Shareds = StaleCI->getArgOperand(1); Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, @@ -1697,10 +1678,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (IfCondition) { // `SplitBlockAndInsertIfThenElse` requires the block to have a // terminator. - BasicBlock *NewBasicBlock = - splitBB(Builder, /*CreateBranch=*/true, "if.end"); + splitBB(Builder, /*CreateBranch=*/true, "if.end"); Instruction *IfTerminator = - NewBasicBlock->getSinglePredecessor()->getTerminator(); + Builder.GetInsertPoint()->getParent()->getTerminator(); Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; Builder.SetInsertPoint(IfTerminator); SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, @@ -1711,10 +1691,12 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Function *TaskCompleteFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData}); + CallInst *CI = nullptr; if (HasShareds) - Builder.CreateCall(WrapperFunc, {ThreadID, TaskData}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData}); else - Builder.CreateCall(WrapperFunc, {ThreadID}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID}); + CI->setDebugLoc(StaleCI->getDebugLoc()); Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData}); Builder.SetInsertPoint(ThenTI); } @@ -1736,18 +1718,28 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, StaleCI->eraseFromParent(); - // Emit the body for wrapper function - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); if (HasShareds) { - llvm::Value *Shareds = - Builder.CreateLoad(VoidPtr, WrapperFunc->getArg(1)); - Builder.CreateCall(&OutlinedFn, {Shareds}); - } else { - Builder.CreateCall(&OutlinedFn); + LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); + OutlinedFn.getArg(1)->replaceUsesWithIf( + Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); + } + + // Replace kmpc_global_thread_num() calls with the global thread id + // argument. + OutlinedFn.getArg(0)->setName("global.tid"); + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + for (Instruction &Inst : instructions(OutlinedFn)) { + CallInst *CI = dyn_cast(&Inst); + if (!CI) + continue; + if (CI->getCalledFunction() == TIDRTLFn.getCallee()) + CI->replaceAllUsesWith(OutlinedFn.getArg(0)); } - Builder.CreateRet(Builder.getInt32(0)); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fd524f6067ee0ea..643b34270c01693 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5486,25 +5486,28 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { 24); // 64-bit pointer + 128-bit integer // Verify Wrapper function - Function *WrapperFunc = + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); + ASSERT_NE(OutlinedFn, nullptr); - LoadInst *SharedsLoad = dyn_cast(WrapperFunc->begin()->begin()); + LoadInst *SharedsLoad = dyn_cast(OutlinedFn->begin()->begin()); ASSERT_NE(SharedsLoad, nullptr); - EXPECT_EQ(SharedsLoad->getPointerOperand(), WrapperFunc->getArg(1)); - - EXPECT_FALSE(WrapperFunc->isDeclaration()); - CallInst *OutlinedFnCall = - dyn_cast(++WrapperFunc->begin()->begin()); - ASSERT_NE(OutlinedFnCall, nullptr); - EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); - EXPECT_EQ(OutlinedFnCall->getArgOperand(0), - WrapperFunc->getArg(1)->uses().begin()->getUser()); + EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); + + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); + + // Verify that the data argument is used only once, and that too in the load + // instruction that is then used for accessing shared data. + Value *DataPtr = OutlinedFn->getArg(1); + EXPECT_EQ(DataPtr->getNumUses(), 1); + EXPECT_TRUE(isa(DataPtr->uses().begin()->getUser())); + Value *Data = DataPtr->uses().begin()->getUser(); + EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { + return isa(U.getUser()); + })); // Verify the presence of `trunc` and `icmp` instructions in Outlined function - Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); - ASSERT_NE(OutlinedFn, nullptr); EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), @@ -5547,6 +5550,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { Builder.CreateRetVoid(); EXPECT_FALSE(verifyModule(*M, &errs())); + + // Check that the outlined function has only one argument. + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)); + ASSERT_NE(OutlinedFn, nullptr); + ASSERT_EQ(OutlinedFn->arg_size(), 1); } TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { @@ -5658,8 +5669,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *Final = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5711,8 +5722,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *IfCondition = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5758,15 +5769,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { ->user_back()); ASSERT_NE(TaskBeginIfCall, nullptr); ASSERT_NE(TaskCompleteCall, nullptr); - Function *WrapperFunc = + Function *OulinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); - CallInst *WrapperFuncCall = dyn_cast(WrapperFunc->user_back()); - ASSERT_NE(WrapperFuncCall, nullptr); + ASSERT_NE(OulinedFn, nullptr); + CallInst *OulinedFnCall = dyn_cast(OulinedFn->user_back()); + ASSERT_NE(OulinedFnCall, nullptr); EXPECT_EQ(TaskBeginIfCall->getParent(), IfConditionBranchInst->getSuccessor(1)); - EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), WrapperFuncCall); - EXPECT_EQ(WrapperFuncCall->getNextNonDebugInstruction(), TaskCompleteCall); + + EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); + EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); } TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 28b0113a19d61b8..2cd561cb021075f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2209,7 +2209,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) omp.task { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2222,7 +2222,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2236,12 +2236,6 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task_with_deps @@ -2259,7 +2253,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}}) omp.task depend(taskdependin -> %zaddr : !llvm.ptr) { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2272,7 +2266,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2286,11 +2280,6 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task @@ -2304,7 +2293,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 16, - // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: ptr @[[outlined_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[shareds]], ptr {{.+}}, i64 16, i1 false) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) @@ -2321,8 +2310,9 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { } } -// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]], ptr %[[task_data:.+]]) // CHECK: task.alloca{{.*}}: +// CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: // CHECK: br label %[[task_region:[^, ]+]] @@ -2333,13 +2323,6 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { -// CHECK: %[[shareds:.+]] = load ptr, ptr %1, align 8 -// CHECK: call void @[[outlined_fn]](ptr %[[shareds]]) -// CHECK: ret i32 0 -// CHECK: } - // ----- llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { @@ -2355,14 +2338,12 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ -// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: define internal void @[[task_outlined_fn]] // CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) -// CHECK: define internal void @par_task_..omp_par..omp_par -// CHECK: define i32 @par_task_..omp_par.wrapper -// CHECK: call void @par_task_..omp_par +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[parallel_outlined_fn]] // ----- llvm.func @foo() -> () @@ -2432,7 +2413,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper) +// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @[[outlined_task_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2445,7 +2426,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2 // CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8 // CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @omp_taskgroup_task..omp_par.1.wrapper) +// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @[[outlined_task_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[t2_alloc]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[shareds]], ptr align 1 %[[structArg]], i64 16, i1 false) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]]) @@ -2617,7 +2598,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @omp_task_final..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2648,14 +2629,14 @@ llvm.func @omp_task_if(%boolexpr: i1) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @omp_task_if..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: br i1 %[[boolexpr]], label %[[true_label:[^,]+]], label %[[false_label:[^,]+]] // CHECK: [[true_label]]: // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit:[^,]+]] // CHECK: [[false_label:[^,]+]]: ; preds = %codeRepl // CHECK: call void @__kmpc_omp_task_begin_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) -// CHECK: %{{.+}} = call i32 @omp_task_if..omp_par.wrapper(i32 %[[omp_global_thread_num]]) +// CHECK: call void @[[task_outlined_fn]](i32 %[[omp_global_thread_num]]) // CHECK: call void @__kmpc_omp_task_complete_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit]] // CHECK: [[if_else_exit]]: >From a1a9438b5e00170030b419a7736053422745cbc6 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:22:30 -0500 Subject: [PATCH 2/2] Remove outlining for teams too. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 178 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 22 +-- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 54012b488c6b671..a5a73bcc10c48e3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -341,6 +341,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); + } + + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + + // Fake use of TID + Builder.restoreIP(InnerAllocaIP); + Instruction *UseFakeVal; + if (AsPtr) + UseFakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); + else + UseFakeVal = + cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + ToBeDeleted.push(UseFakeVal); + return FakeVal; +} + //===----------------------------------------------------------------------===// // OpenMPIRBuilderConfig //===----------------------------------------------------------------------===// @@ -1497,13 +1535,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { - // We create a temporary i32 value that will represent the global tid after - // outlining. - SmallVector ToBeDeleted; - Builder.restoreIP(AllocaIP); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); - ToBeDeleted.append({TID, TIDAddr}); if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1532,19 +1563,24 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); - // Fake use of TID - Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); - BinaryOperator *AddInst = - dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); - ToBeDeleted.push_back(AddInst); + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.ExcludeArgsFromAggregate = {TID}; + + // Add the thread ID argument. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); @@ -1670,7 +1706,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // br label %exit // else: // call @__kmpc_omp_task_begin_if0(...) - // call @wrapper_fn(...) + // call @outlined_fn(...) // call @__kmpc_omp_task_complete_if0(...) // br label %exit // exit: @@ -1725,31 +1761,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - // Replace kmpc_global_thread_num() calls with the global thread id - // argument. - OutlinedFn.getArg(0)->setName("global.tid"); - FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - for (Instruction &Inst : instructions(OutlinedFn)) { - CallInst *CI = dyn_cast(&Inst); - if (!CI) - continue; - if (CI->getCalledFunction() == TIDRTLFn.getCallee()) - CI->replaceAllUsesWith(OutlinedFn.getArg(0)); + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); } - - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); - InsertPointTy TaskAllocaIP = - InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); - InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); - return Builder.saveIP(); } @@ -5740,6 +5759,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5763,84 +5783,62 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *AllocaBB = splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); + // Generate the body of teams. + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP); + OutlineInfo OI; OI.EntryBB = AllocaBB; OI.ExitBB = ExitBB; OI.OuterAllocaBB = &OuterAllocaBB; - OI.PostOutlineCB = [this, Ident](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` + // Insert fake values for global tid and bound tid. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true)); + + OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable { // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + // for runtime call with the outlined function. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); + ToBeDeleted.push(StaleCI); + + assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) && + "Outlined function must have two or three arguments only"); - // Create the wrapper function. - SmallVector WrapperArgTys{Builder.getPtrTy(), Builder.getPtrTy()}; - for (auto &Arg : OutlinedFn.args()) - WrapperArgTys.push_back(Arg.getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".teams").str(), - FunctionType::get(Builder.getVoidTy(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - WrapperFunc->getArg(0)->setName("global_tid"); - WrapperFunc->getArg(1)->setName("bound_tid"); - if (WrapperFunc->arg_size() > 2) - WrapperFunc->getArg(2)->setName("data"); - - // Emit the body of the wrapper function - just a call to outlined function - // and return statement. - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "entrybb", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); - SmallVector Args; - for (size_t ArgIndex = 2; ArgIndex < WrapperFunc->arg_size(); ArgIndex++) - Args.push_back(WrapperFunc->getArg(ArgIndex)); - Builder.CreateCall(&OutlinedFn, Args); - Builder.CreateRetVoid(); - - OutlinedFn.addFnAttr(Attribute::AttrKind::AlwaysInline); + bool HasShared = OutlinedFn.arg_size() == 3; + + OutlinedFn.getArg(0)->setName("global.tid.ptr"); + OutlinedFn.getArg(1)->setName("bound.tid.ptr"); + if (HasShared) + OutlinedFn.getArg(2)->setName("data"); // Call to the runtime function for teams in the current function. assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - Args = {Ident, Builder.getInt32(StaleCI->arg_size()), WrapperFunc}; - for (Use &Arg : StaleCI->args()) - Args.push_back(Arg); + SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), + &OutlinedFn}; + if (HasShared) + Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - StaleCI->eraseFromParent(); + + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); + } }; addOutlineInfo(std::move(OI)); - // Generate the body of teams. - InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); - InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 643b34270c01693..c4b0389c89c7c60 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4057,25 +4057,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { ASSERT_NE(SrcSrc, nullptr); // Verify the outlined function signature. - Function *WrapperFn = + Function *OutlinedFn = dyn_cast(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); - ASSERT_NE(WrapperFn, nullptr); - EXPECT_FALSE(WrapperFn->isDeclaration()); - EXPECT_TRUE(WrapperFn->arg_size() >= 3); - EXPECT_EQ(WrapperFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid - EXPECT_EQ(WrapperFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid - EXPECT_EQ(WrapperFn->getArg(2)->getType(), + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_TRUE(OutlinedFn->arg_size() >= 3); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid + EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid + EXPECT_EQ(OutlinedFn->getArg(2)->getType(), Builder.getPtrTy()); // captured args // Check for TruncInst and ICmpInst in the outlined function. - inst_range Instructions = instructions(WrapperFn); - auto OutlinedFnInst = find_if( - Instructions, [](Instruction &Inst) { return isa(&Inst); }); - ASSERT_NE(OutlinedFnInst, Instructions.end()); - CallInst *OutlinedFnCI = dyn_cast(&*OutlinedFnInst); - ASSERT_NE(OutlinedFnCI, nullptr); - Function *OutlinedFn = OutlinedFnCI->getCalledFunction(); - EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), From lldb-commits at lists.llvm.org Mon Oct 2 07:23:50 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 07:23:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask` (PR #67723) In-Reply-To: Message-ID: <651ad276.630a0220.c5db.4964@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/67723 >From 6aabc3c10ea2d587120b74966b7ce96f9b8167af Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 28 Sep 2023 13:35:07 -0500 Subject: [PATCH 1/2] [OpenMPIRBuilder] Remove wrapper function in `createTask` This patch removes the wrapper function in `OpenMPIRBuilder::createTask`. The outlined function is directly of the form that is expected by the runtime library calls. This also fixes the global thread ID argument, which should be used whenever `kmpc_global_thread_num()` is called inside the outlined function. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 106 ++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 56 +++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 51 +++------ 3 files changed, 99 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9c70d384e55db2b..54012b488c6b671 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1496,6 +1497,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { + // We create a temporary i32 value that will represent the global tid after + // outlining. + SmallVector ToBeDeleted; + Builder.restoreIP(AllocaIP); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); + ToBeDeleted.append({TID, TIDAddr}); + if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1523,41 +1532,27 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + // Fake use of TID + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); + BinaryOperator *AddInst = + dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); + ToBeDeleted.push_back(AddInst); + OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, - Dependencies](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - - // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + OI.ExcludeArgsFromAggregate = {TID}; + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); // HasShareds is true if any variables are captured in the outlined region, // false otherwise. - bool HasShareds = StaleCI->arg_size() > 0; + bool HasShareds = StaleCI->arg_size() > 1; Builder.SetInsertPoint(StaleCI); // Gather the arguments for emitting the runtime call for @@ -1595,7 +1590,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { AllocaInst *ArgStructAlloca = - dyn_cast(StaleCI->getArgOperand(0)); + dyn_cast(StaleCI->getArgOperand(1)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -1606,31 +1601,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); } - - // Argument - task_entry (the wrapper function) - // If the outlined function has some captured variables (i.e. HasShareds is - // true), then the wrapper function will have an additional argument (the - // struct containing captured variables). Otherwise, no such argument will - // be present. - SmallVector WrapperArgTys{Builder.getInt32Ty()}; - if (HasShareds) - WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".wrapper").str(), - FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - // Emit the @__kmpc_omp_task_alloc runtime call // The runtime call returns a pointer to an area where the task captured // variables must be copied before the task is run (TaskData) CallInst *TaskData = Builder.CreateCall( TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, - /*task_func=*/WrapperFunc}); + /*task_func=*/&OutlinedFn}); // Copy the arguments for outlined function if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(0); + Value *Shareds = StaleCI->getArgOperand(1); Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, @@ -1697,10 +1678,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (IfCondition) { // `SplitBlockAndInsertIfThenElse` requires the block to have a // terminator. - BasicBlock *NewBasicBlock = - splitBB(Builder, /*CreateBranch=*/true, "if.end"); + splitBB(Builder, /*CreateBranch=*/true, "if.end"); Instruction *IfTerminator = - NewBasicBlock->getSinglePredecessor()->getTerminator(); + Builder.GetInsertPoint()->getParent()->getTerminator(); Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; Builder.SetInsertPoint(IfTerminator); SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, @@ -1711,10 +1691,12 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Function *TaskCompleteFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData}); + CallInst *CI = nullptr; if (HasShareds) - Builder.CreateCall(WrapperFunc, {ThreadID, TaskData}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData}); else - Builder.CreateCall(WrapperFunc, {ThreadID}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID}); + CI->setDebugLoc(StaleCI->getDebugLoc()); Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData}); Builder.SetInsertPoint(ThenTI); } @@ -1736,18 +1718,28 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, StaleCI->eraseFromParent(); - // Emit the body for wrapper function - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); if (HasShareds) { - llvm::Value *Shareds = - Builder.CreateLoad(VoidPtr, WrapperFunc->getArg(1)); - Builder.CreateCall(&OutlinedFn, {Shareds}); - } else { - Builder.CreateCall(&OutlinedFn); + LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); + OutlinedFn.getArg(1)->replaceUsesWithIf( + Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); + } + + // Replace kmpc_global_thread_num() calls with the global thread id + // argument. + OutlinedFn.getArg(0)->setName("global.tid"); + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + for (Instruction &Inst : instructions(OutlinedFn)) { + CallInst *CI = dyn_cast(&Inst); + if (!CI) + continue; + if (CI->getCalledFunction() == TIDRTLFn.getCallee()) + CI->replaceAllUsesWith(OutlinedFn.getArg(0)); } - Builder.CreateRet(Builder.getInt32(0)); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fd524f6067ee0ea..643b34270c01693 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5486,25 +5486,28 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { 24); // 64-bit pointer + 128-bit integer // Verify Wrapper function - Function *WrapperFunc = + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); + ASSERT_NE(OutlinedFn, nullptr); - LoadInst *SharedsLoad = dyn_cast(WrapperFunc->begin()->begin()); + LoadInst *SharedsLoad = dyn_cast(OutlinedFn->begin()->begin()); ASSERT_NE(SharedsLoad, nullptr); - EXPECT_EQ(SharedsLoad->getPointerOperand(), WrapperFunc->getArg(1)); - - EXPECT_FALSE(WrapperFunc->isDeclaration()); - CallInst *OutlinedFnCall = - dyn_cast(++WrapperFunc->begin()->begin()); - ASSERT_NE(OutlinedFnCall, nullptr); - EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); - EXPECT_EQ(OutlinedFnCall->getArgOperand(0), - WrapperFunc->getArg(1)->uses().begin()->getUser()); + EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); + + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); + + // Verify that the data argument is used only once, and that too in the load + // instruction that is then used for accessing shared data. + Value *DataPtr = OutlinedFn->getArg(1); + EXPECT_EQ(DataPtr->getNumUses(), 1); + EXPECT_TRUE(isa(DataPtr->uses().begin()->getUser())); + Value *Data = DataPtr->uses().begin()->getUser(); + EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { + return isa(U.getUser()); + })); // Verify the presence of `trunc` and `icmp` instructions in Outlined function - Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); - ASSERT_NE(OutlinedFn, nullptr); EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), @@ -5547,6 +5550,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { Builder.CreateRetVoid(); EXPECT_FALSE(verifyModule(*M, &errs())); + + // Check that the outlined function has only one argument. + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)); + ASSERT_NE(OutlinedFn, nullptr); + ASSERT_EQ(OutlinedFn->arg_size(), 1); } TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { @@ -5658,8 +5669,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *Final = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5711,8 +5722,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *IfCondition = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5758,15 +5769,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { ->user_back()); ASSERT_NE(TaskBeginIfCall, nullptr); ASSERT_NE(TaskCompleteCall, nullptr); - Function *WrapperFunc = + Function *OulinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); - CallInst *WrapperFuncCall = dyn_cast(WrapperFunc->user_back()); - ASSERT_NE(WrapperFuncCall, nullptr); + ASSERT_NE(OulinedFn, nullptr); + CallInst *OulinedFnCall = dyn_cast(OulinedFn->user_back()); + ASSERT_NE(OulinedFnCall, nullptr); EXPECT_EQ(TaskBeginIfCall->getParent(), IfConditionBranchInst->getSuccessor(1)); - EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), WrapperFuncCall); - EXPECT_EQ(WrapperFuncCall->getNextNonDebugInstruction(), TaskCompleteCall); + + EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); + EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); } TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 28b0113a19d61b8..2cd561cb021075f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2209,7 +2209,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) omp.task { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2222,7 +2222,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2236,12 +2236,6 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task_with_deps @@ -2259,7 +2253,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}}) omp.task depend(taskdependin -> %zaddr : !llvm.ptr) { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2272,7 +2266,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2286,11 +2280,6 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task @@ -2304,7 +2293,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 16, - // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: ptr @[[outlined_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[shareds]], ptr {{.+}}, i64 16, i1 false) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) @@ -2321,8 +2310,9 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { } } -// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]], ptr %[[task_data:.+]]) // CHECK: task.alloca{{.*}}: +// CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: // CHECK: br label %[[task_region:[^, ]+]] @@ -2333,13 +2323,6 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { -// CHECK: %[[shareds:.+]] = load ptr, ptr %1, align 8 -// CHECK: call void @[[outlined_fn]](ptr %[[shareds]]) -// CHECK: ret i32 0 -// CHECK: } - // ----- llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { @@ -2355,14 +2338,12 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ -// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: define internal void @[[task_outlined_fn]] // CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) -// CHECK: define internal void @par_task_..omp_par..omp_par -// CHECK: define i32 @par_task_..omp_par.wrapper -// CHECK: call void @par_task_..omp_par +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[parallel_outlined_fn]] // ----- llvm.func @foo() -> () @@ -2432,7 +2413,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper) +// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @[[outlined_task_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2445,7 +2426,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2 // CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8 // CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @omp_taskgroup_task..omp_par.1.wrapper) +// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @[[outlined_task_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[t2_alloc]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[shareds]], ptr align 1 %[[structArg]], i64 16, i1 false) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]]) @@ -2617,7 +2598,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @omp_task_final..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2648,14 +2629,14 @@ llvm.func @omp_task_if(%boolexpr: i1) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @omp_task_if..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: br i1 %[[boolexpr]], label %[[true_label:[^,]+]], label %[[false_label:[^,]+]] // CHECK: [[true_label]]: // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit:[^,]+]] // CHECK: [[false_label:[^,]+]]: ; preds = %codeRepl // CHECK: call void @__kmpc_omp_task_begin_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) -// CHECK: %{{.+}} = call i32 @omp_task_if..omp_par.wrapper(i32 %[[omp_global_thread_num]]) +// CHECK: call void @[[task_outlined_fn]](i32 %[[omp_global_thread_num]]) // CHECK: call void @__kmpc_omp_task_complete_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit]] // CHECK: [[if_else_exit]]: >From a1a9438b5e00170030b419a7736053422745cbc6 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:22:30 -0500 Subject: [PATCH 2/2] Remove outlining for teams too. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 178 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 22 +-- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 54012b488c6b671..a5a73bcc10c48e3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -341,6 +341,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); + } + + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + + // Fake use of TID + Builder.restoreIP(InnerAllocaIP); + Instruction *UseFakeVal; + if (AsPtr) + UseFakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); + else + UseFakeVal = + cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + ToBeDeleted.push(UseFakeVal); + return FakeVal; +} + //===----------------------------------------------------------------------===// // OpenMPIRBuilderConfig //===----------------------------------------------------------------------===// @@ -1497,13 +1535,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { - // We create a temporary i32 value that will represent the global tid after - // outlining. - SmallVector ToBeDeleted; - Builder.restoreIP(AllocaIP); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); - ToBeDeleted.append({TID, TIDAddr}); if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1532,19 +1563,24 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); - // Fake use of TID - Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); - BinaryOperator *AddInst = - dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); - ToBeDeleted.push_back(AddInst); + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.ExcludeArgsFromAggregate = {TID}; + + // Add the thread ID argument. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); @@ -1670,7 +1706,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // br label %exit // else: // call @__kmpc_omp_task_begin_if0(...) - // call @wrapper_fn(...) + // call @outlined_fn(...) // call @__kmpc_omp_task_complete_if0(...) // br label %exit // exit: @@ -1725,31 +1761,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - // Replace kmpc_global_thread_num() calls with the global thread id - // argument. - OutlinedFn.getArg(0)->setName("global.tid"); - FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - for (Instruction &Inst : instructions(OutlinedFn)) { - CallInst *CI = dyn_cast(&Inst); - if (!CI) - continue; - if (CI->getCalledFunction() == TIDRTLFn.getCallee()) - CI->replaceAllUsesWith(OutlinedFn.getArg(0)); + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); } - - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); - InsertPointTy TaskAllocaIP = - InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); - InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); - return Builder.saveIP(); } @@ -5740,6 +5759,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5763,84 +5783,62 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *AllocaBB = splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); + // Generate the body of teams. + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP); + OutlineInfo OI; OI.EntryBB = AllocaBB; OI.ExitBB = ExitBB; OI.OuterAllocaBB = &OuterAllocaBB; - OI.PostOutlineCB = [this, Ident](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` + // Insert fake values for global tid and bound tid. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true)); + + OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable { // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + // for runtime call with the outlined function. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); + ToBeDeleted.push(StaleCI); + + assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) && + "Outlined function must have two or three arguments only"); - // Create the wrapper function. - SmallVector WrapperArgTys{Builder.getPtrTy(), Builder.getPtrTy()}; - for (auto &Arg : OutlinedFn.args()) - WrapperArgTys.push_back(Arg.getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".teams").str(), - FunctionType::get(Builder.getVoidTy(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - WrapperFunc->getArg(0)->setName("global_tid"); - WrapperFunc->getArg(1)->setName("bound_tid"); - if (WrapperFunc->arg_size() > 2) - WrapperFunc->getArg(2)->setName("data"); - - // Emit the body of the wrapper function - just a call to outlined function - // and return statement. - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "entrybb", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); - SmallVector Args; - for (size_t ArgIndex = 2; ArgIndex < WrapperFunc->arg_size(); ArgIndex++) - Args.push_back(WrapperFunc->getArg(ArgIndex)); - Builder.CreateCall(&OutlinedFn, Args); - Builder.CreateRetVoid(); - - OutlinedFn.addFnAttr(Attribute::AttrKind::AlwaysInline); + bool HasShared = OutlinedFn.arg_size() == 3; + + OutlinedFn.getArg(0)->setName("global.tid.ptr"); + OutlinedFn.getArg(1)->setName("bound.tid.ptr"); + if (HasShared) + OutlinedFn.getArg(2)->setName("data"); // Call to the runtime function for teams in the current function. assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - Args = {Ident, Builder.getInt32(StaleCI->arg_size()), WrapperFunc}; - for (Use &Arg : StaleCI->args()) - Args.push_back(Arg); + SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), + &OutlinedFn}; + if (HasShared) + Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - StaleCI->eraseFromParent(); + + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); + } }; addOutlineInfo(std::move(OI)); - // Generate the body of teams. - InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); - InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 643b34270c01693..c4b0389c89c7c60 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4057,25 +4057,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { ASSERT_NE(SrcSrc, nullptr); // Verify the outlined function signature. - Function *WrapperFn = + Function *OutlinedFn = dyn_cast(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); - ASSERT_NE(WrapperFn, nullptr); - EXPECT_FALSE(WrapperFn->isDeclaration()); - EXPECT_TRUE(WrapperFn->arg_size() >= 3); - EXPECT_EQ(WrapperFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid - EXPECT_EQ(WrapperFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid - EXPECT_EQ(WrapperFn->getArg(2)->getType(), + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_TRUE(OutlinedFn->arg_size() >= 3); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid + EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid + EXPECT_EQ(OutlinedFn->getArg(2)->getType(), Builder.getPtrTy()); // captured args // Check for TruncInst and ICmpInst in the outlined function. - inst_range Instructions = instructions(WrapperFn); - auto OutlinedFnInst = find_if( - Instructions, [](Instruction &Inst) { return isa(&Inst); }); - ASSERT_NE(OutlinedFnInst, Instructions.end()); - CallInst *OutlinedFnCI = dyn_cast(&*OutlinedFnInst); - ASSERT_NE(OutlinedFnCI, nullptr); - Function *OutlinedFn = OutlinedFnCI->getCalledFunction(); - EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), From lldb-commits at lists.llvm.org Mon Oct 2 07:27:46 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 07:27:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask` (PR #67723) In-Reply-To: Message-ID: <651ad362.170a0220.9be6.e4c6@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/67723 >From 6aabc3c10ea2d587120b74966b7ce96f9b8167af Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 28 Sep 2023 13:35:07 -0500 Subject: [PATCH 1/3] [OpenMPIRBuilder] Remove wrapper function in `createTask` This patch removes the wrapper function in `OpenMPIRBuilder::createTask`. The outlined function is directly of the form that is expected by the runtime library calls. This also fixes the global thread ID argument, which should be used whenever `kmpc_global_thread_num()` is called inside the outlined function. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 106 ++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 56 +++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 51 +++------ 3 files changed, 99 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9c70d384e55db2b..54012b488c6b671 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1496,6 +1497,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { + // We create a temporary i32 value that will represent the global tid after + // outlining. + SmallVector ToBeDeleted; + Builder.restoreIP(AllocaIP); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); + ToBeDeleted.append({TID, TIDAddr}); + if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1523,41 +1532,27 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + // Fake use of TID + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); + BinaryOperator *AddInst = + dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); + ToBeDeleted.push_back(AddInst); + OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, - Dependencies](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - - // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + OI.ExcludeArgsFromAggregate = {TID}; + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); // HasShareds is true if any variables are captured in the outlined region, // false otherwise. - bool HasShareds = StaleCI->arg_size() > 0; + bool HasShareds = StaleCI->arg_size() > 1; Builder.SetInsertPoint(StaleCI); // Gather the arguments for emitting the runtime call for @@ -1595,7 +1590,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { AllocaInst *ArgStructAlloca = - dyn_cast(StaleCI->getArgOperand(0)); + dyn_cast(StaleCI->getArgOperand(1)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -1606,31 +1601,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); } - - // Argument - task_entry (the wrapper function) - // If the outlined function has some captured variables (i.e. HasShareds is - // true), then the wrapper function will have an additional argument (the - // struct containing captured variables). Otherwise, no such argument will - // be present. - SmallVector WrapperArgTys{Builder.getInt32Ty()}; - if (HasShareds) - WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".wrapper").str(), - FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - // Emit the @__kmpc_omp_task_alloc runtime call // The runtime call returns a pointer to an area where the task captured // variables must be copied before the task is run (TaskData) CallInst *TaskData = Builder.CreateCall( TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, - /*task_func=*/WrapperFunc}); + /*task_func=*/&OutlinedFn}); // Copy the arguments for outlined function if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(0); + Value *Shareds = StaleCI->getArgOperand(1); Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, @@ -1697,10 +1678,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (IfCondition) { // `SplitBlockAndInsertIfThenElse` requires the block to have a // terminator. - BasicBlock *NewBasicBlock = - splitBB(Builder, /*CreateBranch=*/true, "if.end"); + splitBB(Builder, /*CreateBranch=*/true, "if.end"); Instruction *IfTerminator = - NewBasicBlock->getSinglePredecessor()->getTerminator(); + Builder.GetInsertPoint()->getParent()->getTerminator(); Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; Builder.SetInsertPoint(IfTerminator); SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, @@ -1711,10 +1691,12 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Function *TaskCompleteFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData}); + CallInst *CI = nullptr; if (HasShareds) - Builder.CreateCall(WrapperFunc, {ThreadID, TaskData}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData}); else - Builder.CreateCall(WrapperFunc, {ThreadID}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID}); + CI->setDebugLoc(StaleCI->getDebugLoc()); Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData}); Builder.SetInsertPoint(ThenTI); } @@ -1736,18 +1718,28 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, StaleCI->eraseFromParent(); - // Emit the body for wrapper function - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); if (HasShareds) { - llvm::Value *Shareds = - Builder.CreateLoad(VoidPtr, WrapperFunc->getArg(1)); - Builder.CreateCall(&OutlinedFn, {Shareds}); - } else { - Builder.CreateCall(&OutlinedFn); + LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); + OutlinedFn.getArg(1)->replaceUsesWithIf( + Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); + } + + // Replace kmpc_global_thread_num() calls with the global thread id + // argument. + OutlinedFn.getArg(0)->setName("global.tid"); + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + for (Instruction &Inst : instructions(OutlinedFn)) { + CallInst *CI = dyn_cast(&Inst); + if (!CI) + continue; + if (CI->getCalledFunction() == TIDRTLFn.getCallee()) + CI->replaceAllUsesWith(OutlinedFn.getArg(0)); } - Builder.CreateRet(Builder.getInt32(0)); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fd524f6067ee0ea..643b34270c01693 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5486,25 +5486,28 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { 24); // 64-bit pointer + 128-bit integer // Verify Wrapper function - Function *WrapperFunc = + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); + ASSERT_NE(OutlinedFn, nullptr); - LoadInst *SharedsLoad = dyn_cast(WrapperFunc->begin()->begin()); + LoadInst *SharedsLoad = dyn_cast(OutlinedFn->begin()->begin()); ASSERT_NE(SharedsLoad, nullptr); - EXPECT_EQ(SharedsLoad->getPointerOperand(), WrapperFunc->getArg(1)); - - EXPECT_FALSE(WrapperFunc->isDeclaration()); - CallInst *OutlinedFnCall = - dyn_cast(++WrapperFunc->begin()->begin()); - ASSERT_NE(OutlinedFnCall, nullptr); - EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); - EXPECT_EQ(OutlinedFnCall->getArgOperand(0), - WrapperFunc->getArg(1)->uses().begin()->getUser()); + EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); + + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); + + // Verify that the data argument is used only once, and that too in the load + // instruction that is then used for accessing shared data. + Value *DataPtr = OutlinedFn->getArg(1); + EXPECT_EQ(DataPtr->getNumUses(), 1); + EXPECT_TRUE(isa(DataPtr->uses().begin()->getUser())); + Value *Data = DataPtr->uses().begin()->getUser(); + EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { + return isa(U.getUser()); + })); // Verify the presence of `trunc` and `icmp` instructions in Outlined function - Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); - ASSERT_NE(OutlinedFn, nullptr); EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), @@ -5547,6 +5550,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { Builder.CreateRetVoid(); EXPECT_FALSE(verifyModule(*M, &errs())); + + // Check that the outlined function has only one argument. + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)); + ASSERT_NE(OutlinedFn, nullptr); + ASSERT_EQ(OutlinedFn->arg_size(), 1); } TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { @@ -5658,8 +5669,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *Final = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5711,8 +5722,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *IfCondition = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5758,15 +5769,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { ->user_back()); ASSERT_NE(TaskBeginIfCall, nullptr); ASSERT_NE(TaskCompleteCall, nullptr); - Function *WrapperFunc = + Function *OulinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); - CallInst *WrapperFuncCall = dyn_cast(WrapperFunc->user_back()); - ASSERT_NE(WrapperFuncCall, nullptr); + ASSERT_NE(OulinedFn, nullptr); + CallInst *OulinedFnCall = dyn_cast(OulinedFn->user_back()); + ASSERT_NE(OulinedFnCall, nullptr); EXPECT_EQ(TaskBeginIfCall->getParent(), IfConditionBranchInst->getSuccessor(1)); - EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), WrapperFuncCall); - EXPECT_EQ(WrapperFuncCall->getNextNonDebugInstruction(), TaskCompleteCall); + + EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); + EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); } TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 28b0113a19d61b8..2cd561cb021075f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2209,7 +2209,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) omp.task { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2222,7 +2222,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2236,12 +2236,6 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task_with_deps @@ -2259,7 +2253,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}}) omp.task depend(taskdependin -> %zaddr : !llvm.ptr) { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2272,7 +2266,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2286,11 +2280,6 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task @@ -2304,7 +2293,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 16, - // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: ptr @[[outlined_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[shareds]], ptr {{.+}}, i64 16, i1 false) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) @@ -2321,8 +2310,9 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { } } -// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]], ptr %[[task_data:.+]]) // CHECK: task.alloca{{.*}}: +// CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: // CHECK: br label %[[task_region:[^, ]+]] @@ -2333,13 +2323,6 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { -// CHECK: %[[shareds:.+]] = load ptr, ptr %1, align 8 -// CHECK: call void @[[outlined_fn]](ptr %[[shareds]]) -// CHECK: ret i32 0 -// CHECK: } - // ----- llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { @@ -2355,14 +2338,12 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ -// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: define internal void @[[task_outlined_fn]] // CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) -// CHECK: define internal void @par_task_..omp_par..omp_par -// CHECK: define i32 @par_task_..omp_par.wrapper -// CHECK: call void @par_task_..omp_par +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[parallel_outlined_fn]] // ----- llvm.func @foo() -> () @@ -2432,7 +2413,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper) +// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @[[outlined_task_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2445,7 +2426,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2 // CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8 // CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @omp_taskgroup_task..omp_par.1.wrapper) +// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @[[outlined_task_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[t2_alloc]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[shareds]], ptr align 1 %[[structArg]], i64 16, i1 false) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]]) @@ -2617,7 +2598,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @omp_task_final..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2648,14 +2629,14 @@ llvm.func @omp_task_if(%boolexpr: i1) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @omp_task_if..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: br i1 %[[boolexpr]], label %[[true_label:[^,]+]], label %[[false_label:[^,]+]] // CHECK: [[true_label]]: // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit:[^,]+]] // CHECK: [[false_label:[^,]+]]: ; preds = %codeRepl // CHECK: call void @__kmpc_omp_task_begin_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) -// CHECK: %{{.+}} = call i32 @omp_task_if..omp_par.wrapper(i32 %[[omp_global_thread_num]]) +// CHECK: call void @[[task_outlined_fn]](i32 %[[omp_global_thread_num]]) // CHECK: call void @__kmpc_omp_task_complete_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit]] // CHECK: [[if_else_exit]]: >From a1a9438b5e00170030b419a7736053422745cbc6 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:22:30 -0500 Subject: [PATCH 2/3] Remove outlining for teams too. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 178 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 22 +-- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 54012b488c6b671..a5a73bcc10c48e3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -341,6 +341,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); + } + + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + + // Fake use of TID + Builder.restoreIP(InnerAllocaIP); + Instruction *UseFakeVal; + if (AsPtr) + UseFakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); + else + UseFakeVal = + cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + ToBeDeleted.push(UseFakeVal); + return FakeVal; +} + //===----------------------------------------------------------------------===// // OpenMPIRBuilderConfig //===----------------------------------------------------------------------===// @@ -1497,13 +1535,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { - // We create a temporary i32 value that will represent the global tid after - // outlining. - SmallVector ToBeDeleted; - Builder.restoreIP(AllocaIP); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); - ToBeDeleted.append({TID, TIDAddr}); if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1532,19 +1563,24 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); - // Fake use of TID - Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); - BinaryOperator *AddInst = - dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); - ToBeDeleted.push_back(AddInst); + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.ExcludeArgsFromAggregate = {TID}; + + // Add the thread ID argument. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); @@ -1670,7 +1706,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // br label %exit // else: // call @__kmpc_omp_task_begin_if0(...) - // call @wrapper_fn(...) + // call @outlined_fn(...) // call @__kmpc_omp_task_complete_if0(...) // br label %exit // exit: @@ -1725,31 +1761,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - // Replace kmpc_global_thread_num() calls with the global thread id - // argument. - OutlinedFn.getArg(0)->setName("global.tid"); - FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - for (Instruction &Inst : instructions(OutlinedFn)) { - CallInst *CI = dyn_cast(&Inst); - if (!CI) - continue; - if (CI->getCalledFunction() == TIDRTLFn.getCallee()) - CI->replaceAllUsesWith(OutlinedFn.getArg(0)); + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); } - - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); - InsertPointTy TaskAllocaIP = - InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); - InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); - return Builder.saveIP(); } @@ -5740,6 +5759,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5763,84 +5783,62 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *AllocaBB = splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); + // Generate the body of teams. + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP); + OutlineInfo OI; OI.EntryBB = AllocaBB; OI.ExitBB = ExitBB; OI.OuterAllocaBB = &OuterAllocaBB; - OI.PostOutlineCB = [this, Ident](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` + // Insert fake values for global tid and bound tid. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true)); + + OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable { // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + // for runtime call with the outlined function. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); + ToBeDeleted.push(StaleCI); + + assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) && + "Outlined function must have two or three arguments only"); - // Create the wrapper function. - SmallVector WrapperArgTys{Builder.getPtrTy(), Builder.getPtrTy()}; - for (auto &Arg : OutlinedFn.args()) - WrapperArgTys.push_back(Arg.getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".teams").str(), - FunctionType::get(Builder.getVoidTy(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - WrapperFunc->getArg(0)->setName("global_tid"); - WrapperFunc->getArg(1)->setName("bound_tid"); - if (WrapperFunc->arg_size() > 2) - WrapperFunc->getArg(2)->setName("data"); - - // Emit the body of the wrapper function - just a call to outlined function - // and return statement. - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "entrybb", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); - SmallVector Args; - for (size_t ArgIndex = 2; ArgIndex < WrapperFunc->arg_size(); ArgIndex++) - Args.push_back(WrapperFunc->getArg(ArgIndex)); - Builder.CreateCall(&OutlinedFn, Args); - Builder.CreateRetVoid(); - - OutlinedFn.addFnAttr(Attribute::AttrKind::AlwaysInline); + bool HasShared = OutlinedFn.arg_size() == 3; + + OutlinedFn.getArg(0)->setName("global.tid.ptr"); + OutlinedFn.getArg(1)->setName("bound.tid.ptr"); + if (HasShared) + OutlinedFn.getArg(2)->setName("data"); // Call to the runtime function for teams in the current function. assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - Args = {Ident, Builder.getInt32(StaleCI->arg_size()), WrapperFunc}; - for (Use &Arg : StaleCI->args()) - Args.push_back(Arg); + SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), + &OutlinedFn}; + if (HasShared) + Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - StaleCI->eraseFromParent(); + + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); + } }; addOutlineInfo(std::move(OI)); - // Generate the body of teams. - InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); - InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 643b34270c01693..c4b0389c89c7c60 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4057,25 +4057,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { ASSERT_NE(SrcSrc, nullptr); // Verify the outlined function signature. - Function *WrapperFn = + Function *OutlinedFn = dyn_cast(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); - ASSERT_NE(WrapperFn, nullptr); - EXPECT_FALSE(WrapperFn->isDeclaration()); - EXPECT_TRUE(WrapperFn->arg_size() >= 3); - EXPECT_EQ(WrapperFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid - EXPECT_EQ(WrapperFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid - EXPECT_EQ(WrapperFn->getArg(2)->getType(), + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_TRUE(OutlinedFn->arg_size() >= 3); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid + EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid + EXPECT_EQ(OutlinedFn->getArg(2)->getType(), Builder.getPtrTy()); // captured args // Check for TruncInst and ICmpInst in the outlined function. - inst_range Instructions = instructions(WrapperFn); - auto OutlinedFnInst = find_if( - Instructions, [](Instruction &Inst) { return isa(&Inst); }); - ASSERT_NE(OutlinedFnInst, Instructions.end()); - CallInst *OutlinedFnCI = dyn_cast(&*OutlinedFnInst); - ASSERT_NE(OutlinedFnCI, nullptr); - Function *OutlinedFn = OutlinedFnCI->getCalledFunction(); - EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), >From 4b71558a1936983e1eeebfee98de6b4d8f1062cc Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:26:57 -0500 Subject: [PATCH 3/3] Remove unintentional include for InstIterator.h --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a5a73bcc10c48e3..f62d244a2dc4c68 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,7 +35,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" From lldb-commits at lists.llvm.org Mon Oct 2 07:28:30 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 07:28:30 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask` (PR #67723) In-Reply-To: Message-ID: <651ad38e.a70a0220.30773.4a32@mx.google.com> https://github.com/shraiysh edited https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Mon Oct 2 07:51:02 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 07:51:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651ad8d6.a70a0220.b6df6.4df6@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/67723 >From 6aabc3c10ea2d587120b74966b7ce96f9b8167af Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 28 Sep 2023 13:35:07 -0500 Subject: [PATCH 1/4] [OpenMPIRBuilder] Remove wrapper function in `createTask` This patch removes the wrapper function in `OpenMPIRBuilder::createTask`. The outlined function is directly of the form that is expected by the runtime library calls. This also fixes the global thread ID argument, which should be used whenever `kmpc_global_thread_num()` is called inside the outlined function. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 106 ++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 56 +++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 51 +++------ 3 files changed, 99 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9c70d384e55db2b..54012b488c6b671 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1496,6 +1497,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { + // We create a temporary i32 value that will represent the global tid after + // outlining. + SmallVector ToBeDeleted; + Builder.restoreIP(AllocaIP); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); + ToBeDeleted.append({TID, TIDAddr}); + if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1523,41 +1532,27 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + // Fake use of TID + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); + BinaryOperator *AddInst = + dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); + ToBeDeleted.push_back(AddInst); + OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, - Dependencies](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - - // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + OI.ExcludeArgsFromAggregate = {TID}; + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); // HasShareds is true if any variables are captured in the outlined region, // false otherwise. - bool HasShareds = StaleCI->arg_size() > 0; + bool HasShareds = StaleCI->arg_size() > 1; Builder.SetInsertPoint(StaleCI); // Gather the arguments for emitting the runtime call for @@ -1595,7 +1590,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { AllocaInst *ArgStructAlloca = - dyn_cast(StaleCI->getArgOperand(0)); + dyn_cast(StaleCI->getArgOperand(1)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -1606,31 +1601,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); } - - // Argument - task_entry (the wrapper function) - // If the outlined function has some captured variables (i.e. HasShareds is - // true), then the wrapper function will have an additional argument (the - // struct containing captured variables). Otherwise, no such argument will - // be present. - SmallVector WrapperArgTys{Builder.getInt32Ty()}; - if (HasShareds) - WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".wrapper").str(), - FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - // Emit the @__kmpc_omp_task_alloc runtime call // The runtime call returns a pointer to an area where the task captured // variables must be copied before the task is run (TaskData) CallInst *TaskData = Builder.CreateCall( TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, - /*task_func=*/WrapperFunc}); + /*task_func=*/&OutlinedFn}); // Copy the arguments for outlined function if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(0); + Value *Shareds = StaleCI->getArgOperand(1); Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, @@ -1697,10 +1678,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (IfCondition) { // `SplitBlockAndInsertIfThenElse` requires the block to have a // terminator. - BasicBlock *NewBasicBlock = - splitBB(Builder, /*CreateBranch=*/true, "if.end"); + splitBB(Builder, /*CreateBranch=*/true, "if.end"); Instruction *IfTerminator = - NewBasicBlock->getSinglePredecessor()->getTerminator(); + Builder.GetInsertPoint()->getParent()->getTerminator(); Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; Builder.SetInsertPoint(IfTerminator); SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, @@ -1711,10 +1691,12 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Function *TaskCompleteFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData}); + CallInst *CI = nullptr; if (HasShareds) - Builder.CreateCall(WrapperFunc, {ThreadID, TaskData}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData}); else - Builder.CreateCall(WrapperFunc, {ThreadID}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID}); + CI->setDebugLoc(StaleCI->getDebugLoc()); Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData}); Builder.SetInsertPoint(ThenTI); } @@ -1736,18 +1718,28 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, StaleCI->eraseFromParent(); - // Emit the body for wrapper function - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); if (HasShareds) { - llvm::Value *Shareds = - Builder.CreateLoad(VoidPtr, WrapperFunc->getArg(1)); - Builder.CreateCall(&OutlinedFn, {Shareds}); - } else { - Builder.CreateCall(&OutlinedFn); + LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); + OutlinedFn.getArg(1)->replaceUsesWithIf( + Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); + } + + // Replace kmpc_global_thread_num() calls with the global thread id + // argument. + OutlinedFn.getArg(0)->setName("global.tid"); + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + for (Instruction &Inst : instructions(OutlinedFn)) { + CallInst *CI = dyn_cast(&Inst); + if (!CI) + continue; + if (CI->getCalledFunction() == TIDRTLFn.getCallee()) + CI->replaceAllUsesWith(OutlinedFn.getArg(0)); } - Builder.CreateRet(Builder.getInt32(0)); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fd524f6067ee0ea..643b34270c01693 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5486,25 +5486,28 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { 24); // 64-bit pointer + 128-bit integer // Verify Wrapper function - Function *WrapperFunc = + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); + ASSERT_NE(OutlinedFn, nullptr); - LoadInst *SharedsLoad = dyn_cast(WrapperFunc->begin()->begin()); + LoadInst *SharedsLoad = dyn_cast(OutlinedFn->begin()->begin()); ASSERT_NE(SharedsLoad, nullptr); - EXPECT_EQ(SharedsLoad->getPointerOperand(), WrapperFunc->getArg(1)); - - EXPECT_FALSE(WrapperFunc->isDeclaration()); - CallInst *OutlinedFnCall = - dyn_cast(++WrapperFunc->begin()->begin()); - ASSERT_NE(OutlinedFnCall, nullptr); - EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); - EXPECT_EQ(OutlinedFnCall->getArgOperand(0), - WrapperFunc->getArg(1)->uses().begin()->getUser()); + EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); + + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); + + // Verify that the data argument is used only once, and that too in the load + // instruction that is then used for accessing shared data. + Value *DataPtr = OutlinedFn->getArg(1); + EXPECT_EQ(DataPtr->getNumUses(), 1); + EXPECT_TRUE(isa(DataPtr->uses().begin()->getUser())); + Value *Data = DataPtr->uses().begin()->getUser(); + EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { + return isa(U.getUser()); + })); // Verify the presence of `trunc` and `icmp` instructions in Outlined function - Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); - ASSERT_NE(OutlinedFn, nullptr); EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), @@ -5547,6 +5550,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { Builder.CreateRetVoid(); EXPECT_FALSE(verifyModule(*M, &errs())); + + // Check that the outlined function has only one argument. + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)); + ASSERT_NE(OutlinedFn, nullptr); + ASSERT_EQ(OutlinedFn->arg_size(), 1); } TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { @@ -5658,8 +5669,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *Final = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5711,8 +5722,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *IfCondition = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5758,15 +5769,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { ->user_back()); ASSERT_NE(TaskBeginIfCall, nullptr); ASSERT_NE(TaskCompleteCall, nullptr); - Function *WrapperFunc = + Function *OulinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); - CallInst *WrapperFuncCall = dyn_cast(WrapperFunc->user_back()); - ASSERT_NE(WrapperFuncCall, nullptr); + ASSERT_NE(OulinedFn, nullptr); + CallInst *OulinedFnCall = dyn_cast(OulinedFn->user_back()); + ASSERT_NE(OulinedFnCall, nullptr); EXPECT_EQ(TaskBeginIfCall->getParent(), IfConditionBranchInst->getSuccessor(1)); - EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), WrapperFuncCall); - EXPECT_EQ(WrapperFuncCall->getNextNonDebugInstruction(), TaskCompleteCall); + + EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); + EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); } TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 28b0113a19d61b8..2cd561cb021075f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2209,7 +2209,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) omp.task { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2222,7 +2222,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2236,12 +2236,6 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task_with_deps @@ -2259,7 +2253,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}}) omp.task depend(taskdependin -> %zaddr : !llvm.ptr) { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2272,7 +2266,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2286,11 +2280,6 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task @@ -2304,7 +2293,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 16, - // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: ptr @[[outlined_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[shareds]], ptr {{.+}}, i64 16, i1 false) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) @@ -2321,8 +2310,9 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { } } -// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]], ptr %[[task_data:.+]]) // CHECK: task.alloca{{.*}}: +// CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: // CHECK: br label %[[task_region:[^, ]+]] @@ -2333,13 +2323,6 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { -// CHECK: %[[shareds:.+]] = load ptr, ptr %1, align 8 -// CHECK: call void @[[outlined_fn]](ptr %[[shareds]]) -// CHECK: ret i32 0 -// CHECK: } - // ----- llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { @@ -2355,14 +2338,12 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ -// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: define internal void @[[task_outlined_fn]] // CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) -// CHECK: define internal void @par_task_..omp_par..omp_par -// CHECK: define i32 @par_task_..omp_par.wrapper -// CHECK: call void @par_task_..omp_par +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[parallel_outlined_fn]] // ----- llvm.func @foo() -> () @@ -2432,7 +2413,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper) +// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @[[outlined_task_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2445,7 +2426,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2 // CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8 // CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @omp_taskgroup_task..omp_par.1.wrapper) +// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @[[outlined_task_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[t2_alloc]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[shareds]], ptr align 1 %[[structArg]], i64 16, i1 false) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]]) @@ -2617,7 +2598,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @omp_task_final..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2648,14 +2629,14 @@ llvm.func @omp_task_if(%boolexpr: i1) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @omp_task_if..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: br i1 %[[boolexpr]], label %[[true_label:[^,]+]], label %[[false_label:[^,]+]] // CHECK: [[true_label]]: // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit:[^,]+]] // CHECK: [[false_label:[^,]+]]: ; preds = %codeRepl // CHECK: call void @__kmpc_omp_task_begin_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) -// CHECK: %{{.+}} = call i32 @omp_task_if..omp_par.wrapper(i32 %[[omp_global_thread_num]]) +// CHECK: call void @[[task_outlined_fn]](i32 %[[omp_global_thread_num]]) // CHECK: call void @__kmpc_omp_task_complete_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit]] // CHECK: [[if_else_exit]]: >From a1a9438b5e00170030b419a7736053422745cbc6 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:22:30 -0500 Subject: [PATCH 2/4] Remove outlining for teams too. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 178 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 22 +-- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 54012b488c6b671..a5a73bcc10c48e3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -341,6 +341,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); + } + + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + + // Fake use of TID + Builder.restoreIP(InnerAllocaIP); + Instruction *UseFakeVal; + if (AsPtr) + UseFakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); + else + UseFakeVal = + cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + ToBeDeleted.push(UseFakeVal); + return FakeVal; +} + //===----------------------------------------------------------------------===// // OpenMPIRBuilderConfig //===----------------------------------------------------------------------===// @@ -1497,13 +1535,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { - // We create a temporary i32 value that will represent the global tid after - // outlining. - SmallVector ToBeDeleted; - Builder.restoreIP(AllocaIP); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); - ToBeDeleted.append({TID, TIDAddr}); if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1532,19 +1563,24 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); - // Fake use of TID - Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); - BinaryOperator *AddInst = - dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); - ToBeDeleted.push_back(AddInst); + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.ExcludeArgsFromAggregate = {TID}; + + // Add the thread ID argument. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); @@ -1670,7 +1706,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // br label %exit // else: // call @__kmpc_omp_task_begin_if0(...) - // call @wrapper_fn(...) + // call @outlined_fn(...) // call @__kmpc_omp_task_complete_if0(...) // br label %exit // exit: @@ -1725,31 +1761,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - // Replace kmpc_global_thread_num() calls with the global thread id - // argument. - OutlinedFn.getArg(0)->setName("global.tid"); - FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - for (Instruction &Inst : instructions(OutlinedFn)) { - CallInst *CI = dyn_cast(&Inst); - if (!CI) - continue; - if (CI->getCalledFunction() == TIDRTLFn.getCallee()) - CI->replaceAllUsesWith(OutlinedFn.getArg(0)); + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); } - - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); - InsertPointTy TaskAllocaIP = - InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); - InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); - return Builder.saveIP(); } @@ -5740,6 +5759,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5763,84 +5783,62 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *AllocaBB = splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); + // Generate the body of teams. + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP); + OutlineInfo OI; OI.EntryBB = AllocaBB; OI.ExitBB = ExitBB; OI.OuterAllocaBB = &OuterAllocaBB; - OI.PostOutlineCB = [this, Ident](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` + // Insert fake values for global tid and bound tid. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true)); + + OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable { // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + // for runtime call with the outlined function. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); + ToBeDeleted.push(StaleCI); + + assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) && + "Outlined function must have two or three arguments only"); - // Create the wrapper function. - SmallVector WrapperArgTys{Builder.getPtrTy(), Builder.getPtrTy()}; - for (auto &Arg : OutlinedFn.args()) - WrapperArgTys.push_back(Arg.getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".teams").str(), - FunctionType::get(Builder.getVoidTy(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - WrapperFunc->getArg(0)->setName("global_tid"); - WrapperFunc->getArg(1)->setName("bound_tid"); - if (WrapperFunc->arg_size() > 2) - WrapperFunc->getArg(2)->setName("data"); - - // Emit the body of the wrapper function - just a call to outlined function - // and return statement. - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "entrybb", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); - SmallVector Args; - for (size_t ArgIndex = 2; ArgIndex < WrapperFunc->arg_size(); ArgIndex++) - Args.push_back(WrapperFunc->getArg(ArgIndex)); - Builder.CreateCall(&OutlinedFn, Args); - Builder.CreateRetVoid(); - - OutlinedFn.addFnAttr(Attribute::AttrKind::AlwaysInline); + bool HasShared = OutlinedFn.arg_size() == 3; + + OutlinedFn.getArg(0)->setName("global.tid.ptr"); + OutlinedFn.getArg(1)->setName("bound.tid.ptr"); + if (HasShared) + OutlinedFn.getArg(2)->setName("data"); // Call to the runtime function for teams in the current function. assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - Args = {Ident, Builder.getInt32(StaleCI->arg_size()), WrapperFunc}; - for (Use &Arg : StaleCI->args()) - Args.push_back(Arg); + SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), + &OutlinedFn}; + if (HasShared) + Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - StaleCI->eraseFromParent(); + + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); + } }; addOutlineInfo(std::move(OI)); - // Generate the body of teams. - InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); - InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 643b34270c01693..c4b0389c89c7c60 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4057,25 +4057,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { ASSERT_NE(SrcSrc, nullptr); // Verify the outlined function signature. - Function *WrapperFn = + Function *OutlinedFn = dyn_cast(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); - ASSERT_NE(WrapperFn, nullptr); - EXPECT_FALSE(WrapperFn->isDeclaration()); - EXPECT_TRUE(WrapperFn->arg_size() >= 3); - EXPECT_EQ(WrapperFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid - EXPECT_EQ(WrapperFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid - EXPECT_EQ(WrapperFn->getArg(2)->getType(), + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_TRUE(OutlinedFn->arg_size() >= 3); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid + EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid + EXPECT_EQ(OutlinedFn->getArg(2)->getType(), Builder.getPtrTy()); // captured args // Check for TruncInst and ICmpInst in the outlined function. - inst_range Instructions = instructions(WrapperFn); - auto OutlinedFnInst = find_if( - Instructions, [](Instruction &Inst) { return isa(&Inst); }); - ASSERT_NE(OutlinedFnInst, Instructions.end()); - CallInst *OutlinedFnCI = dyn_cast(&*OutlinedFnInst); - ASSERT_NE(OutlinedFnCI, nullptr); - Function *OutlinedFn = OutlinedFnCI->getCalledFunction(); - EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), >From 4b71558a1936983e1eeebfee98de6b4d8f1062cc Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:26:57 -0500 Subject: [PATCH 3/4] Remove unintentional include for InstIterator.h --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a5a73bcc10c48e3..f62d244a2dc4c68 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,7 +35,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" >From 7c95d29b677c6107f81b0c26c139a34475a6fe81 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:50:26 -0500 Subject: [PATCH 4/4] Fix insertpoint after createTask --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f62d244a2dc4c68..5ed2a345a14dd04 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1566,7 +1566,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; @@ -1767,6 +1766,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, }; addOutlineInfo(std::move(OI)); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); return Builder.saveIP(); } From lldb-commits at lists.llvm.org Mon Oct 2 08:06:52 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Mon, 02 Oct 2023 08:06:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Update Enum name for CompressedWithHigh (PR #67845) In-Reply-To: Message-ID: <651adc8c.630a0220.366c3.3d23@mx.google.com> https://github.com/yinying-lisa-li closed https://github.com/llvm/llvm-project/pull/67845 From lldb-commits at lists.llvm.org Mon Oct 2 08:08:53 2023 From: lldb-commits at lists.llvm.org (Shafik Yaghmour via lldb-commits) Date: Mon, 02 Oct 2023 08:08:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [Clang] Fix crash when ill-formed code is treated as a deduction guide (PR #67373) In-Reply-To: Message-ID: <651add05.170a0220.9f151.cedb@mx.google.com> https://github.com/shafik closed https://github.com/llvm/llvm-project/pull/67373 From lldb-commits at lists.llvm.org Mon Oct 2 10:11:57 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 10:11:57 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Export DWARF Parser symbols for external language plugins (PR #67851) In-Reply-To: Message-ID: <651af9dd.170a0220.798b8.e908@mx.google.com> jimingham wrote: So first off, the lldb_private API is not a stable API and will likely never be. After all, it vends lots of llvm ADT types and other llvm & clang API's which are also not stable API's... So anything which tends to making these API's seem like we're vending them as such, or that it's safe to distribute shared libraries that depend on them is a supported mode is going in the wrong direction. If we want to make the plugin API a stable API for external customers, we'll have to do a bunch more work (including either getting by-in to make ABI stable versions of the underlying llvm/clang API's...) If you are not planning to make an actual loadable plugin, which I am pretty sure we don't want to support with the lldb_private API's at this time, then I'm not clear why all this work is necessary, as opposed to just linking to the lldb internal headers & building against the .a files. https://github.com/llvm/llvm-project/pull/67851 From lldb-commits at lists.llvm.org Mon Oct 2 10:27:04 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Mon, 02 Oct 2023 10:27:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Export DWARF Parser symbols for external language plugins (PR #67851) In-Reply-To: Message-ID: <651afd68.170a0220.ce622.f281@mx.google.com> walter-erquinigo wrote: @jimingham , indeed, I don't want to have a plugin that could be loadable by any build of lldb. The Mojo SDK is distributing already its own build of vanilla lldb along with the mojo plugin that is loaded at runtime and that links correctly with that lldb. I initially tried linking the .a files, but that lead to a not short list of dependencies that included clang and other libraries that I also needed to link against, which was not very clean and could lead to potential issues on windows, where the exports file has a limited size. In the end after all the discussion, I think that the cleanest solution that could benefit other plugin developers is to allow specifying a custom exports file that can be used instead of `third-party/llvm-project/lldb/source/API/liblldb-private.exports` offering more control on what to export. https://github.com/llvm/llvm-project/pull/67851 From lldb-commits at lists.llvm.org Mon Oct 2 10:42:48 2023 From: lldb-commits at lists.llvm.org (Shafik Yaghmour via lldb-commits) Date: Mon, 02 Oct 2023 10:42:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains_subrange (PR #66963) In-Reply-To: Message-ID: <651b0118.170a0220.8d6c5.ce2e@mx.google.com> shafik wrote: Please make sure you add a description to your PR. This is what usually goes into the git log and we want those entries to be as descriptive and helpful for folks who read the git logs, thank you. https://github.com/llvm/llvm-project/pull/66963 From lldb-commits at lists.llvm.org Mon Oct 2 10:46:21 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 10:46:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] cdd3e96 - [lldb] Replace lldb's DWARFDebugAbbrev implementation with llvm's (#67841) Message-ID: <651b01ed.170a0220.dc698.7c4c@mx.google.com> Author: Alex Langford Date: 2023-10-02T10:46:16-07:00 New Revision: cdd3e964f229aac5366433a549466d18ed696660 URL: https://github.com/llvm/llvm-project/commit/cdd3e964f229aac5366433a549466d18ed696660 DIFF: https://github.com/llvm/llvm-project/commit/cdd3e964f229aac5366433a549466d18ed696660.diff LOG: [lldb] Replace lldb's DWARFDebugAbbrev implementation with llvm's (#67841) The implementations are now close enough that replacing it is trivial. Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h Removed: lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt index dad206040068716..0e4fd5b995d1ba9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt @@ -17,7 +17,6 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN DWARFCompileUnit.cpp DWARFContext.cpp DWARFDataExtractor.cpp - DWARFDebugAbbrev.cpp DWARFDebugAranges.cpp DWARFDebugArangeSet.cpp DWARFDebugInfo.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index ab3017ba0ffcbca..65debac4c7d9265 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -12,6 +12,10 @@ #include "DWARFUnit.h" #include "llvm/Support/Error.h" +namespace llvm { +class DWARFAbbreviationDeclarationSet; +} + class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; @@ -27,7 +31,7 @@ class DWARFCompileUnit : public DWARFUnit { private: DWARFCompileUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, - const DWARFAbbreviationDeclarationSet &abbrevs, + const llvm::DWARFAbbreviationDeclarationSet &abbrevs, DIERef::Section section, bool is_dwo) : DWARFUnit(dwarf, uid, header, abbrevs, section, is_dwo) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp deleted file mode 100644 index f3c2755c5a527cc..000000000000000 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp +++ /dev/null @@ -1,63 +0,0 @@ -//===-- DWARFDebugAbbrev.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "DWARFDebugAbbrev.h" -#include "DWARFDataExtractor.h" -#include "DWARFFormValue.h" -#include "lldb/Utility/Stream.h" - -using namespace lldb; -using namespace lldb_private; - -// DWARFDebugAbbrev constructor -DWARFDebugAbbrev::DWARFDebugAbbrev(const DWARFDataExtractor &data) - : m_abbrevCollMap(), m_prev_abbr_offset_pos(m_abbrevCollMap.end()), - m_data(data.GetAsLLVM()) {} - -// DWARFDebugAbbrev::Parse() -llvm::Error DWARFDebugAbbrev::parse() { - if (!m_data) - return llvm::Error::success(); - - lldb::offset_t offset = 0; - - while (m_data->isValidOffset(offset)) { - uint32_t initial_cu_offset = offset; - DWARFAbbreviationDeclarationSet abbrevDeclSet; - - llvm::Error error = abbrevDeclSet.extract(*m_data, &offset); - if (error) { - m_data = std::nullopt; - return error; - } - - m_abbrevCollMap[initial_cu_offset] = abbrevDeclSet; - } - m_data = std::nullopt; - m_prev_abbr_offset_pos = m_abbrevCollMap.end(); - return llvm::ErrorSuccess(); -} - -// DWARFDebugAbbrev::GetAbbreviationDeclarationSet() -const DWARFAbbreviationDeclarationSet * -DWARFDebugAbbrev::GetAbbreviationDeclarationSet( - dw_offset_t cu_abbr_offset) const { - DWARFAbbreviationDeclarationCollMapConstIter end = m_abbrevCollMap.end(); - DWARFAbbreviationDeclarationCollMapConstIter pos; - if (m_prev_abbr_offset_pos != end && - m_prev_abbr_offset_pos->first == cu_abbr_offset) - return &(m_prev_abbr_offset_pos->second); - else { - pos = m_abbrevCollMap.find(cu_abbr_offset); - m_prev_abbr_offset_pos = pos; - } - - if (pos != m_abbrevCollMap.end()) - return &(pos->second); - return nullptr; -} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h deleted file mode 100644 index d2fade0934c8a88..000000000000000 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h +++ /dev/null @@ -1,55 +0,0 @@ -//===-- DWARFDebugAbbrev.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGABBREV_H -#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGABBREV_H - -#include "DWARFDefines.h" -#include "lldb/lldb-private.h" - -#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" - -#include - -using DWARFAbbreviationDeclaration = llvm::DWARFAbbreviationDeclaration; -using DWARFAbbreviationDeclarationSet = llvm::DWARFAbbreviationDeclarationSet; - -typedef std::map - DWARFAbbreviationDeclarationCollMap; -typedef DWARFAbbreviationDeclarationCollMap::iterator - DWARFAbbreviationDeclarationCollMapIter; -typedef DWARFAbbreviationDeclarationCollMap::const_iterator - DWARFAbbreviationDeclarationCollMapConstIter; - -class DWARFDebugAbbrev { -public: - DWARFDebugAbbrev(const lldb_private::DWARFDataExtractor &data); - const DWARFAbbreviationDeclarationSet * - GetAbbreviationDeclarationSet(dw_offset_t cu_abbr_offset) const; - /// Extract all abbreviations for a particular compile unit. Returns - /// llvm::ErrorSuccess() on success, and an appropriate llvm::Error object - /// otherwise. - llvm::Error parse(); - - DWARFAbbreviationDeclarationCollMapConstIter begin() const { - assert(!m_data && "Must call parse before iterating over DWARFDebugAbbrev"); - return m_abbrevCollMap.begin(); - } - - DWARFAbbreviationDeclarationCollMapConstIter end() const { - return m_abbrevCollMap.end(); - } - -protected: - DWARFAbbreviationDeclarationCollMap m_abbrevCollMap; - mutable DWARFAbbreviationDeclarationCollMapConstIter m_prev_abbr_offset_pos; - mutable std::optional m_data; -}; - -#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGABBREV_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a08637aef066978..a6ab83700904cb9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -22,7 +22,6 @@ #include "lldb/Utility/StreamString.h" #include "DWARFCompileUnit.h" -#include "DWARFDebugAbbrev.h" #include "DWARFDebugAranges.h" #include "DWARFDebugInfo.h" #include "DWARFDebugRanges.h" @@ -32,6 +31,8 @@ #include "SymbolFileDWARF.h" #include "SymbolFileDWARFDwo.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" + using namespace lldb_private; using namespace lldb_private::dwarf; extern int g_verbose; @@ -810,12 +811,13 @@ lldb::offset_t DWARFDebugInfoEntry::GetFirstAttributeOffset() const { return GetOffset() + llvm::getULEB128Size(m_abbr_idx); } -const DWARFAbbreviationDeclaration * +const llvm::DWARFAbbreviationDeclaration * DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const { if (!cu) return nullptr; - const DWARFAbbreviationDeclarationSet *abbrev_set = cu->GetAbbreviations(); + const llvm::DWARFAbbreviationDeclarationSet *abbrev_set = + cu->GetAbbreviations(); if (!abbrev_set) return nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index c2ea40065232e72..29db44a16bb1281 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -14,7 +14,6 @@ #include "DWARFAttribute.h" #include "DWARFBaseDIE.h" -#include "DWARFDebugAbbrev.h" #include "DWARFDebugRanges.h" #include #include diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5e4d48ab285a9d6..5d939582a312e98 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -12,6 +12,10 @@ #include "DWARFUnit.h" #include "llvm/Support/Error.h" +namespace llvm { +class DWARFAbbreviationDeclarationSet; +} + class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} @@ -27,7 +31,7 @@ class DWARFTypeUnit : public DWARFUnit { private: DWARFTypeUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, - const DWARFAbbreviationDeclarationSet &abbrevs, + const llvm::DWARFAbbreviationDeclarationSet &abbrevs, DIERef::Section section, bool is_dwo) : DWARFUnit(dwarf, uid, header, abbrevs, section, is_dwo) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 749ffcb094ecfd9..45e37b42f5e9566 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -13,6 +13,7 @@ #include "lldb/Utility/LLDBAssert.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/Timer.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Object/Error.h" @@ -32,7 +33,7 @@ extern int g_verbose; DWARFUnit::DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, - const DWARFAbbreviationDeclarationSet &abbrevs, + const llvm::DWARFAbbreviationDeclarationSet &abbrevs, DIERef::Section section, bool is_dwo) : UserID(uid), m_dwarf(dwarf), m_header(header), m_abbrevs(&abbrevs), m_cancel_scopes(false), m_section(section), m_is_dwo(is_dwo), @@ -435,7 +436,8 @@ size_t DWARFUnit::GetDebugInfoSize() const { return GetLengthByteSize() + GetLength() - GetHeaderByteSize(); } -const DWARFAbbreviationDeclarationSet *DWARFUnit::GetAbbreviations() const { +const llvm::DWARFAbbreviationDeclarationSet * +DWARFUnit::GetAbbreviations() const { return m_abbrevs; } @@ -973,7 +975,7 @@ DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid, if (!expected_header) return expected_header.takeError(); - const DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev(); + const llvm::DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev(); if (!abbr) return llvm::make_error( "No debug_abbrev data"); @@ -985,8 +987,12 @@ DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid, return llvm::make_error( "Abbreviation offset for unit is not valid"); - const DWARFAbbreviationDeclarationSet *abbrevs = - abbr->GetAbbreviationDeclarationSet(expected_header->GetAbbrOffset()); + llvm::Expected abbrevs_or_err = + abbr->getAbbreviationDeclarationSet(expected_header->GetAbbrOffset()); + if (!abbrevs_or_err) + return abbrevs_or_err.takeError(); + + const llvm::DWARFAbbreviationDeclarationSet *abbrevs = *abbrevs_or_err; if (!abbrevs) return llvm::make_error( "No abbrev exists at the specified offset."); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index bc55b093e894edd..004c01a37bb05e3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -13,6 +13,7 @@ #include "DWARFDebugInfoEntry.h" #include "lldb/Utility/XcodeSDK.h" #include "lldb/lldb-enumerations.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/Support/RWMutex.h" #include @@ -153,7 +154,7 @@ class DWARFUnit : public lldb_private::UserID { // Size of the CU data incl. header but without initial length. uint32_t GetLength() const { return m_header.GetLength(); } uint16_t GetVersion() const { return m_header.GetVersion(); } - const DWARFAbbreviationDeclarationSet *GetAbbreviations() const; + const llvm::DWARFAbbreviationDeclarationSet *GetAbbreviations() const; dw_offset_t GetAbbrevOffset() const; uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); } dw_addr_t GetAddrBase() const { return m_addr_base.value_or(0); } @@ -291,7 +292,7 @@ class DWARFUnit : public lldb_private::UserID { protected: DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, - const DWARFAbbreviationDeclarationSet &abbrevs, + const llvm::DWARFAbbreviationDeclarationSet &abbrevs, DIERef::Section section, bool is_dwo); llvm::Error ExtractHeader(SymbolFileDWARF &dwarf, @@ -323,7 +324,7 @@ class DWARFUnit : public lldb_private::UserID { SymbolFileDWARF &m_dwarf; std::shared_ptr m_dwo; DWARFUnitHeader m_header; - const DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr; + const llvm::DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr; void *m_user_data = nullptr; // The compile unit debug information entry item DWARFDebugInfoEntry::collection m_die_array; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index aae481e2ae74177..e472074545a6f07 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -58,7 +58,6 @@ #include "DWARFASTParser.h" #include "DWARFASTParserClang.h" #include "DWARFCompileUnit.h" -#include "DWARFDebugAbbrev.h" #include "DWARFDebugAranges.h" #include "DWARFDebugInfo.h" #include "DWARFDebugMacro.h" @@ -74,6 +73,7 @@ #include "SymbolFileDWARFDwo.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" @@ -511,7 +511,8 @@ bool SymbolFileDWARF::SupportedVersion(uint16_t version) { return version >= 2 && version <= 5; } -static std::set GetUnsupportedForms(DWARFDebugAbbrev *debug_abbrev) { +static std::set +GetUnsupportedForms(llvm::DWARFDebugAbbrev *debug_abbrev) { if (!debug_abbrev) return {}; @@ -553,7 +554,7 @@ uint32_t SymbolFileDWARF::CalculateAbilities() { if (section) debug_abbrev_file_size = section->GetFileSize(); - DWARFDebugAbbrev *abbrev = DebugAbbrev(); + llvm::DWARFDebugAbbrev *abbrev = DebugAbbrev(); std::set unsupported_forms = GetUnsupportedForms(abbrev); if (!unsupported_forms.empty()) { StreamString error; @@ -624,7 +625,7 @@ void SymbolFileDWARF::LoadSectionData(lldb::SectionType sect_type, m_objfile_sp->ReadSectionData(section_sp.get(), data); } -DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { +llvm::DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { if (m_abbr) return m_abbr.get(); @@ -632,7 +633,8 @@ DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { if (debug_abbrev_data.GetByteSize() == 0) return nullptr; - auto abbr = std::make_unique(debug_abbrev_data); + auto abbr = + std::make_unique(debug_abbrev_data.GetAsLLVM()); llvm::Error error = abbr->parse(); if (error) { Log *log = GetLog(DWARFLog::DebugInfo); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 191a5abcf265abd..5aaf8bd270ef7b1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -41,7 +41,6 @@ // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; -class DWARFDebugAbbrev; class DWARFDebugAranges; class DWARFDebugInfo; class DWARFDebugInfoEntry; @@ -55,6 +54,10 @@ class SymbolFileDWARFDwo; class SymbolFileDWARFDwp; class UserID; +namespace llvm { +class DWARFDebugAbbrev; +} + #define DIE_IS_BEING_PARSED ((lldb_private::Type *)1) class SymbolFileDWARF : public lldb_private::SymbolFileCommon { @@ -224,7 +227,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - DWARFDebugAbbrev *DebugAbbrev(); + llvm::DWARFDebugAbbrev *DebugAbbrev(); DWARFDebugInfo &DebugInfo(); @@ -536,7 +539,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_info_once_flag; std::unique_ptr m_info; - std::unique_ptr m_abbr; + std::unique_ptr m_abbr; std::unique_ptr m_global_aranges_up; typedef std::unordered_map From lldb-commits at lists.llvm.org Mon Oct 2 10:46:22 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 10:46:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Replace lldb's DWARFDebugAbbrev implementation with llvm's (PR #67841) In-Reply-To: Message-ID: <651b01ee.630a0220.37c74.204a@mx.google.com> https://github.com/bulbazord closed https://github.com/llvm/llvm-project/pull/67841 From lldb-commits at lists.llvm.org Mon Oct 2 10:54:39 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Mon, 02 Oct 2023 10:54:39 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Export DWARF Parser symbols for external language plugins (PR #67851) In-Reply-To: Message-ID: <651b03df.170a0220.4107f.17af@mx.google.com> https://github.com/walter-erquinigo closed https://github.com/llvm/llvm-project/pull/67851 From lldb-commits at lists.llvm.org Mon Oct 2 10:57:15 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 10:57:15 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) Message-ID: https://github.com/jeffreytan81 created https://github.com/llvm/llvm-project/pull/68012 This patch implements the data formatters for LibStdC++ `std::variant`. >From e7e47a211ebaaa0f6380810b6573fadde12ca02d Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 10:53:17 -0700 Subject: [PATCH] Implement data formatters for LibStdC++ std::variant --- lldb/examples/synthetic/gnu_libstdcpp.py | 89 +++++++++++++++++++ .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 18 +++- .../libstdcpp/variant/Makefile | 5 ++ .../TestDataFormatterLibStdcxxVariant.py | 72 +++++++++++++++ .../libstdcpp/variant/main.cpp | 79 ++++++++++++++++ 5 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..7462db744674682 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,92 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not ( + index_obj + and index_obj.IsValid() + and data_obj + and data_obj.IsValid() + ): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..d8a30729b6d02e1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,11 +332,11 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), context, identifier); @@ -372,7 +372,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +388,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1104,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1153,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..88be87a5469e196 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} From lldb-commits at lists.llvm.org Mon Oct 2 10:59:57 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Mon, 02 Oct 2023 10:59:57 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) Message-ID: https://github.com/walter-erquinigo created https://github.com/llvm/llvm-project/pull/68013 LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `third-party/llvm-project/lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 >From 4abd9478422cdf471103ff01d7994d2e7ffc1500 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 13:56:00 -0400 Subject: [PATCH] [LLDB] Allow specifying a custom exports file LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `third-party/llvm-project/lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 --- lldb/cmake/modules/LLDBConfig.cmake | 3 +++ lldb/source/API/CMakeLists.txt | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 380016ce48015fa..264eed1ad82012f 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -125,6 +125,9 @@ endif() set(LLDB_EXPORT_ALL_SYMBOLS 0 CACHE BOOL "Causes lldb to export all symbols when building liblldb.") +set(LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE "" CACHE PATH + "When `LLDB_EXPORT_ALL_SYMBOLS` is enabled, this specifies the exports file to use when building liblldb.") + if ((NOT MSVC) OR MSVC12) add_definitions( -DHAVE_ROUND ) endif() diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..45e3b7a91034006 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -177,11 +177,15 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") # from working on some systems but limits the liblldb size. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb namespace") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb.exports) - else() + elseif (NOT LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE) # Don't use an explicit export. Instead, tell the linker to # export all symbols. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports) + else () + MESSAGE("-- Symbols (liblldb): exporting all symbols specified in the exports " + " file '${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}'") + add_llvm_symbol_exports(liblldb "${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}") endif() set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") elseif (LLDB_EXPORT_ALL_SYMBOLS) From lldb-commits at lists.llvm.org Mon Oct 2 11:00:25 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Mon, 02 Oct 2023 11:00:25 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651b0539.630a0220.122e6.0a4c@mx.google.com> https://github.com/walter-erquinigo ready_for_review https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Mon Oct 2 11:01:44 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 11:01:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651b0588.170a0220.aabce.50a2@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `third-party/llvm-project/lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 --- Full diff: https://github.com/llvm/llvm-project/pull/68013.diff 2 Files Affected: - (modified) lldb/cmake/modules/LLDBConfig.cmake (+3) - (modified) lldb/source/API/CMakeLists.txt (+5-1) ``````````diff diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 380016ce48015fa..264eed1ad82012f 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -125,6 +125,9 @@ endif() set(LLDB_EXPORT_ALL_SYMBOLS 0 CACHE BOOL "Causes lldb to export all symbols when building liblldb.") +set(LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE "" CACHE PATH + "When `LLDB_EXPORT_ALL_SYMBOLS` is enabled, this specifies the exports file to use when building liblldb.") + if ((NOT MSVC) OR MSVC12) add_definitions( -DHAVE_ROUND ) endif() diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..45e3b7a91034006 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -177,11 +177,15 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") # from working on some systems but limits the liblldb size. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb namespace") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb.exports) - else() + elseif (NOT LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE) # Don't use an explicit export. Instead, tell the linker to # export all symbols. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports) + else () + MESSAGE("-- Symbols (liblldb): exporting all symbols specified in the exports " + " file '${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}'") + add_llvm_symbol_exports(liblldb "${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}") endif() set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") elseif (LLDB_EXPORT_ALL_SYMBOLS) ``````````
https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Mon Oct 2 11:08:01 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 11:08:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b0701.050a0220.2d2ef.49b9@mx.google.com> github-actions[bot] wrote: :warning: Python code formatter, darker found issues in your code. :warning:
You can test this locally with the following command: ``````````bash darker --check --diff -r 2db8540a71ef546087158fcbf38e3b1883c5df48..e7e47a211ebaaa0f6380810b6573fadde12ca02d lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py lldb/examples/synthetic/gnu_libstdcpp.py ``````````
View the diff from darker here. ``````````diff --- examples/synthetic/gnu_libstdcpp.py 2023-10-02 17:53:17.000000 +0000 +++ examples/synthetic/gnu_libstdcpp.py 2023-10-02 18:07:54.852096 +0000 @@ -896,16 +896,11 @@ def VariantSummaryProvider(valobj, dict): raw_obj = valobj.GetNonSyntheticValue() index_obj = raw_obj.GetChildMemberWithName("_M_index") data_obj = raw_obj.GetChildMemberWithName("_M_u") - if not ( - index_obj - and index_obj.IsValid() - and data_obj - and data_obj.IsValid() - ): + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): return "" def get_variant_npos_value(index_byte_size): if index_byte_size == 1: return 0xFF ``````````
https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 11:08:03 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 11:08:03 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b0703.a70a0220.52474.69af@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff 2db8540a71ef546087158fcbf38e3b1883c5df48 e7e47a211ebaaa0f6380810b6573fadde12ca02d -- lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index d8a30729b6d0..ad6d627938c0 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -336,10 +336,8 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success - = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), - context, - identifier); + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier( + path_str.c_str(), context, identifier); if (!success) return m_full.GetStringRef().contains(path); @@ -1105,10 +1103,10 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); cpp_category_sp->AddTypeSynthetic( - "^std::variant<.+>$", eFormatterMatchRegex, - SyntheticChildrenSP(new ScriptedSyntheticChildren( - stl_synth_flags, - "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); ``````````
https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 11:20:52 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Mon, 02 Oct 2023 11:20:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651b0a04.170a0220.370ee.197c@mx.google.com> junior-jl wrote: > You can use `lldb/test/Shell/Commands/command-thread-select.test` as an example to write your own test. Using this file as a template, I wrote `command-process-launch-user-entry.test` as shown below: ``` # RUN: %clang_host -g %S/Inputs/main.c -o %t # RUN: %lldb %t -s %s -o exit | FileCheck %s process launch -m # CHECK-LABEL: process launch -m # CHECK: Process {{.*}} stopped # CHECK: stop reason = one-shot breakpoint 1 # CHECK: frame #0: {{.*}}`main at main.c ``` But I wanted to add the check line `# CHECK: Process {{.*}} launched: '{{.*}}' {{.*}}`, but this fails because the following is happening: 1. When the command is executed from the `(lldb)` prompt, it correctly shows that the process is launched then stopped. ``` $ ./bin/lldb ~/main.out (lldb) target create "/home/jose/main.out" Current executable set to '/home/jose/main.out' (x86_64). (lldb) process launch -m Process 63632 launched: '/home/jose/main.out' (x86_64) Process 63632 stopped * thread #1, name = 'main.out', stop reason = one-shot breakpoint 1 frame #0: 0x0000555555555140 main.out`main at main.c:2 1 int foo() { return 0; } -> 2 int main() { return foo(); } 3 (lldb) ``` 2. When I run like shown below, the `Process launched` message is shown after the stop. ``` $ ./bin/lldb ~/main.out -o 'process launch -m' (lldb) target create "/home/jose/main.out" Current executable set to '/home/jose/main.out' (x86_64). (lldb) process launch -m Process 63846 stopped * thread #1, name = 'main.out', stop reason = one-shot breakpoint 1 frame #0: 0x0000555555555140 main.out`main at main.c:2 1 int foo() { return 0; } -> 2 int main() { return foo(); } 3 Process 63846 launched: '/home/jose/main.out' (x86_64) (lldb) ``` Is this behaviour expected? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Mon Oct 2 11:43:43 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 11:43:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651b0f5f.170a0220.bf1f.8fa2@mx.google.com> =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: medismailben wrote: > > You can use `lldb/test/Shell/Commands/command-thread-select.test` as an example to write your own test. > > Using this file as a template, I wrote `command-process-launch-user-entry.test` as shown below: > > ``` > # RUN: %clang_host -g %S/Inputs/main.c -o %t > # RUN: %lldb %t -s %s -o exit | FileCheck %s > > process launch -m > # CHECK-LABEL: process launch -m > # CHECK: Process {{.*}} stopped > # CHECK: stop reason = one-shot breakpoint 1 > # CHECK: frame #0: {{.*}}`main at main.c > ``` > > But I wanted to add the check line `# CHECK: Process {{.*}} launched: '{{.*}}' {{.*}}`, but this fails because the following is happening: > > 1. When the command is executed from the `(lldb)` prompt, it correctly shows that the process is launched then stopped. > > ``` > $ ./bin/lldb ~/main.out > (lldb) target create "/home/jose/main.out" > Current executable set to '/home/jose/main.out' (x86_64). > (lldb) process launch -m > Process 63632 launched: '/home/jose/main.out' (x86_64) > Process 63632 stopped > * thread #1, name = 'main.out', stop reason = one-shot breakpoint 1 > frame #0: 0x0000555555555140 main.out`main at main.c:2 > 1 int foo() { return 0; } > -> 2 int main() { return foo(); } > 3 > (lldb) > ``` > > 2. When I run like shown below, the `Process launched` message is shown after the stop. > > ``` > $ ./bin/lldb ~/main.out -o 'process launch -m' > (lldb) target create "/home/jose/main.out" > Current executable set to '/home/jose/main.out' (x86_64). > (lldb) process launch -m > Process 63846 stopped > * thread #1, name = 'main.out', stop reason = one-shot breakpoint 1 > frame #0: 0x0000555555555140 main.out`main at main.c:2 > 1 int foo() { return 0; } > -> 2 int main() { return foo(); } > 3 > Process 63846 launched: '/home/jose/main.out' (x86_64) > (lldb) > ``` > > Is this behaviour expected? I can reproduce it: ``` $ ./bin/lldb /tmp/main -o "b main" -o "r" (lldb) target create "/tmp/main" Current executable set to '/tmp/main' (arm64). (lldb) b main Breakpoint 1: where = main`main + 12 at main.cpp:2:3, address = 0x0000000100003fa0 (lldb) r Process 53764 stopped * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 frame #0: 0x0000000100003fa0 main`main at main.cpp:2:3 1 int main() { -> 2 return 0; 3 } Process 53764 launched: '/tmp/main' (arm64) ``` By default the debugger runs in asynchronous mode so the stop events can be handled in a nondetermistic way. However, I think this confusing and we should do something about (in a separate PR). Could you file a new issue describing this behavior and pasting the link here. Your test looks fine to me, so we can move on with it to merge your PR. I'll update it after we fix that other bug. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Mon Oct 2 11:46:03 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 11:46:03 -0700 (PDT) Subject: [Lldb-commits] [lldb] Improve omp offload profiler (PR #68016) In-Reply-To: Message-ID: <651b0feb.170a0220.ab631.e6d7@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff 0b07b06effe5fdf779b75bb5ac6cf15e477cb0be 7c550d3190614d7572bc2d63051d7ce56c8f73b6 -- llvm/lib/Support/TimeProfiler.cpp openmp/libomptarget/src/api.cpp openmp/libomptarget/src/interface.cpp openmp/libomptarget/src/omptarget.cpp openmp/libomptarget/src/private.h ``````````
View the diff from clang-format here. ``````````diff diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 4446583102a8..330a4d93378a 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -226,7 +226,7 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurNs / 1000 ); + J.attribute("dur", DurNs / 1000); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); @@ -261,10 +261,10 @@ struct llvm::TimeTraceProfiler { // Emit the absolute time when this TimeProfiler started. // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. - J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) - .time_since_epoch() - .count()/1000); + J.attribute("beginningOfTime", time_point_cast(BeginningOfTime) + .time_since_epoch() + .count() / + 1000); J.objectEnd(); } diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 5dd918808492..06de1f8f20b7 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,8 +50,8 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DeviceNum) - +";size="+std::to_string(Size)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) + + ";size=" + std::to_string(Size)); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -136,9 +136,9 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -291,9 +291,9 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -379,10 +379,10 @@ EXTERN int omp_target_memcpy_rect_async( const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(ElementSize) - +";num_dims="+std::to_string(NumDims)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(ElementSize) + + ";num_dims=" + std::to_string(NumDims)); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 99a7abc7e0bc..2c7ab7a49d0b 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -80,8 +80,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", - "NumArgs="+ - std::to_string(ArgNum), Loc); + "NumArgs=" + std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -274,10 +273,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(KernelArgs->ThreadLimit[0] == static_cast(ThreadLimit) && !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", - "NumTeams="+std::to_string(NumTeams)+ - ";NumArgs="+ - std::to_string(KernelArgs->NumArgs), Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Runtime target exe", + "NumTeams=" + std::to_string(NumTeams) + + ";NumArgs=" + std::to_string(KernelArgs->NumArgs), + Loc); if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, @@ -305,7 +305,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - { //required to show syncronization + { // required to show syncronization TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index b5a2dfc68569..630ded281e1a 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -574,10 +574,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev", - "Size="+ - std::to_string(ArgSizes[I])+ - "B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "HostToDev", "Size=" + std::to_string(ArgSizes[I]) + "B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -953,8 +951,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+ - std::to_string(DataSize)+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1669,14 +1667,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", - "NumArguments="+ - std::to_string(KernelArgs.NumArgs)+ - ";NumTeams="+ - std::to_string(KernelArgs.NumTeams[0])+ - ";TripCount="+ - std::to_string(KernelArgs.Tripcount) - , Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Kernel Target", + "NumArguments=" + std::to_string(KernelArgs.NumArgs) + + ";NumTeams=" + std::to_string(KernelArgs.NumTeams[0]) + + ";TripCount=" + std::to_string(KernelArgs.Tripcount), + Loc); #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index c8d07138b180..8657390dde17 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -435,7 +435,7 @@ public: std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) +// llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ ``````````
https://github.com/llvm/llvm-project/pull/68016 From lldb-commits at lists.llvm.org Mon Oct 2 12:14:44 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 12:14:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] Improve omp offload profiler (PR #68016) In-Reply-To: Message-ID: <651b16a4.620a0220.f3093.59e2@mx.google.com> https://github.com/fel-cab updated https://github.com/llvm/llvm-project/pull/68016 >From dd44de067c26ba94b6561c5ed7fa4a5d812a3d1a Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Mon, 18 Sep 2023 12:07:12 +0000 Subject: [PATCH 1/9] testing Profiler features --- openmp/libomptarget/src/interface.cpp | 5 ++++- openmp/libomptarget/src/private.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 5f21b16b3fbfb1e..f64e1e268a3952e 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -252,7 +252,10 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_IDENT(Loc); + //TIMESCOPE_WITH_IDENT(Loc); + TIMESCOPE(); + //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT("Hello", Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index cbce15b63a3eba2..dc6cd3944233955 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -433,7 +433,8 @@ class ExponentialBackoff { SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From 92586bca6364100c7511ad38a30f41b0f86dea9c Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 19 Sep 2023 12:02:53 +0000 Subject: [PATCH 2/9] Improve Profiler 1 --- llvm/lib/Support/TimeProfiler.cpp | 2 +- openmp/libomptarget/src/interface.cpp | 17 +++++++++-------- openmp/libomptarget/src/omptarget.cpp | 10 +++++----- openmp/libomptarget/src/private.h | 5 +++-- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 4d625b3eb5b1709..e1458116f64ab47 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -227,7 +227,7 @@ struct llvm::TimeTraceProfiler { J.attribute("ph", "X"); J.attribute("ts", 0); J.attribute("dur", DurUs); - J.attribute("name", "Total " + Total.first); + J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); J.attribute("avg ms", int64_t(DurUs / Count / 1000)); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index f64e1e268a3952e..b8892cbe689107f 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -33,14 +33,14 @@ using namespace llvm::omp::target::ompt; //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { - TIMESCOPE(); + //TIMESCOPE(); PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { - TIMESCOPE(); + //TIMESCOPE(); if (PM->maybeDelayRegisterLib(Desc)) return; @@ -61,7 +61,7 @@ EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { - TIMESCOPE(); + //TIMESCOPE(); PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { @@ -82,7 +82,8 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("targetData", Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -253,9 +254,9 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_IDENT(Loc); - TIMESCOPE(); + //TIMESCOPE(); //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); - //TIMESCOPE_WITH_RTM_AND_IDENT("Hello", Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", @@ -411,7 +412,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, // Get the current number of components for a user-defined mapper. EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { - TIMESCOPE(); + //TIMESCOPE(); auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; int64_t Size = MapperComponentsPtr->Components.size(); DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", @@ -423,7 +424,7 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, void *Begin, int64_t Size, int64_t Type, void *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("__tgt_push_mapper_component(Handle=" DPxMOD ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s).\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 40419e448942608..3754f63909dac9c 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -398,7 +398,7 @@ static int32_t getParentIndex(int64_t Type) { void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -427,7 +427,7 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum, DPxPTR(DevicePtr)); @@ -453,7 +453,7 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -493,7 +493,7 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, } void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d unlocking\n", Name, DeviceNum); DeviceTy *DevicePtr = nullptr; @@ -572,7 +572,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - TIMESCOPE_WITH_IDENT(Loc); + //TIMESCOPE_WITH_IDENT(Loc); // process each input. for (int32_t I = 0; I < ArgNum; ++I) { // Ignore private variables and arrays - there is no mapping for them. diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index dc6cd3944233955..b1ada09d64c7a55 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -433,8 +433,9 @@ class ExponentialBackoff { SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) + #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From f9167dc8fef277ac1aa53e2e95bade3f0b727df1 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 19 Sep 2023 21:33:24 +0000 Subject: [PATCH 3/9] Changed profiling to work in nanoseconds. Made Profiling calls for runtime calls and different ones for kernel lunches and memory transfers. --- llvm/lib/Support/TimeProfiler.cpp | 28 +++++++++++++-------------- openmp/libomptarget/src/interface.cpp | 7 ++----- openmp/libomptarget/src/omptarget.cpp | 11 +++++++---- openmp/libomptarget/src/private.h | 6 +++--- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index e1458116f64ab47..64b3ef35be27c42 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -30,7 +30,7 @@ namespace { using std::chrono::duration; using std::chrono::duration_cast; -using std::chrono::microseconds; +using std::chrono::nanoseconds; using std::chrono::steady_clock; using std::chrono::system_clock; using std::chrono::time_point; @@ -80,14 +80,14 @@ struct TimeTraceProfilerEntry { // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { - return (time_point_cast(Start) - - time_point_cast(StartTime)) + return (time_point_cast(Start) - + time_point_cast(StartTime)) .count(); } ClockType::rep getFlameGraphDurUs() const { - return (time_point_cast(End) - - time_point_cast(Start)) + return (time_point_cast(End) - + time_point_cast(Start)) .count(); } }; @@ -123,7 +123,7 @@ struct llvm::TimeTraceProfiler { DurationType Duration = E.End - E.Start; // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(Duration).count() >= TimeTraceGranularity) + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -169,8 +169,8 @@ struct llvm::TimeTraceProfiler { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); - J.attribute("ts", StartUs); - J.attribute("dur", DurUs); + J.attribute("ts", StartUs / 1000); + J.attribute("dur", DurUs / 1000); J.attribute("name", E.Name); if (!E.Detail.empty()) { J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); @@ -218,7 +218,7 @@ struct llvm::TimeTraceProfiler { // Report totals on separate threads of tracing file. uint64_t TotalTid = MaxTid + 1; for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurUs = duration_cast(Total.second.second).count(); + auto DurUs = duration_cast(Total.second.second).count(); auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&] { @@ -226,11 +226,11 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurUs); + J.attribute("dur", DurUs / 1000); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); - J.attribute("avg ms", int64_t(DurUs / Count / 1000)); + J.attribute("avg ms", int64_t(DurUs / Count / 1000 / 1000)); }); }); @@ -262,9 +262,9 @@ struct llvm::TimeTraceProfiler { // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) + time_point_cast(BeginningOfTime) .time_since_epoch() - .count()); + .count()/1000); J.objectEnd(); } @@ -281,7 +281,7 @@ struct llvm::TimeTraceProfiler { SmallString<0> ThreadName; const uint64_t Tid; - // Minimum time granularity (in microseconds) + // Minimum time granularity (in nanoseconds) const unsigned TimeTraceGranularity; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index b8892cbe689107f..d4ee246f84449f1 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -83,7 +83,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); - TIMESCOPE_WITH_RTM_AND_IDENT("targetData", Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("Runtime Data Copy", Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -253,10 +253,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - //TIMESCOPE_WITH_IDENT(Loc); - //TIMESCOPE(); - //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); - //TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); + TIMESCOPE_WITH_NAME_AND_IDENT("Runtime target exe",Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 3754f63909dac9c..ad966e7e1c47544 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -863,6 +863,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { + //TIMESCOPE_WITH_NAME_AND_IDENT("targetDataEnd", Loc); int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); // process each input. @@ -955,7 +956,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - + TIMESCOPE_WITH_NAME_AND_IDENT("DevToHost", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1445,7 +1446,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, SmallVector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); + //TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); @@ -1493,6 +1494,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, DPxPTR(HstPtrVal)); continue; } + TIMESCOPE_WITH_RTM_AND_IDENT("HostToDev", Loc); DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1572,7 +1574,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, map_var_info_t *ArgNames, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); + //TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; // Move data from device. @@ -1597,6 +1599,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, return Ret; }); + return OFFLOAD_SUCCESS; } } // namespace @@ -1672,7 +1675,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index b1ada09d64c7a55..f0591cd17b0fd15 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -432,10 +432,10 @@ class ExponentialBackoff { #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ + std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) - + llvm::TimeTraceScope TimeScope(ProfileName, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From c82ce52f244d218752fea2dcc1f347fc589cd016 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Thu, 21 Sep 2023 14:22:28 +0000 Subject: [PATCH 4/9] test with DevToHost --- openmp/libomptarget/src/omptarget.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index ad966e7e1c47544..e113942375ef9c6 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -956,7 +956,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_NAME_AND_IDENT("DevToHost", Loc); + std::string MessageDataSize = "DevToHost "+std::to_string(DataSize)+"B"; + TIMESCOPE_WITH_NAME_AND_IDENT(MessageDataSize, Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { >From 448f0e77b6c824de73cbd9ae34d4c59b02e7e441 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Fri, 22 Sep 2023 21:48:57 +0000 Subject: [PATCH 5/9] Fixing nanoseconds in totals, adding syncronize timings, and adding extra info in kernels and device --- llvm/lib/Support/TimeProfiler.cpp | 24 ++++++++++++------------ openmp/libomptarget/src/interface.cpp | 18 ++++++++++-------- openmp/libomptarget/src/omptarget.cpp | 19 +++++++++---------- openmp/libomptarget/src/private.h | 10 +++++++--- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 64b3ef35be27c42..4446583102a8133 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -79,13 +79,13 @@ struct TimeTraceProfilerEntry { // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. - ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { + ClockType::rep getFlameGraphStartNs(TimePointType StartTime) const { return (time_point_cast(Start) - time_point_cast(StartTime)) .count(); } - ClockType::rep getFlameGraphDurUs() const { + ClockType::rep getFlameGraphDurNs() const { return (time_point_cast(End) - time_point_cast(Start)) .count(); @@ -114,9 +114,9 @@ struct llvm::TimeTraceProfiler { // Check that end times monotonically increase. assert((Entries.empty() || - (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= - Entries.back().getFlameGraphStartUs(StartTime) + - Entries.back().getFlameGraphDurUs())) && + (E.getFlameGraphStartNs(StartTime) + E.getFlameGraphDurNs() >= + Entries.back().getFlameGraphStartNs(StartTime) + + Entries.back().getFlameGraphDurNs())) && "TimeProfiler scope ended earlier than previous scope"); // Calculate duration at full precision for overall counts. @@ -162,15 +162,15 @@ struct llvm::TimeTraceProfiler { // Emit all events for the main flame graph. auto writeEvent = [&](const auto &E, uint64_t Tid) { - auto StartUs = E.getFlameGraphStartUs(StartTime); - auto DurUs = E.getFlameGraphDurUs(); + auto StartNs = E.getFlameGraphStartNs(StartTime); + auto DurNs = E.getFlameGraphDurNs(); J.object([&] { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); - J.attribute("ts", StartUs / 1000); - J.attribute("dur", DurUs / 1000); + J.attribute("ts", StartNs / 1000); + J.attribute("dur", DurNs / 1000); J.attribute("name", E.Name); if (!E.Detail.empty()) { J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); @@ -218,7 +218,7 @@ struct llvm::TimeTraceProfiler { // Report totals on separate threads of tracing file. uint64_t TotalTid = MaxTid + 1; for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurUs = duration_cast(Total.second.second).count(); + auto DurNs = duration_cast(Total.second.second).count(); auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&] { @@ -226,11 +226,11 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurUs / 1000); + J.attribute("dur", DurNs / 1000 ); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); - J.attribute("avg ms", int64_t(DurUs / Count / 1000 / 1000)); + J.attribute("avg us", int64_t(DurNs / Count / 1000)); }); }); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index d4ee246f84449f1..bed9b1e40db455b 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -83,7 +83,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); - TIMESCOPE_WITH_RTM_AND_IDENT("Runtime Data Copy", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -252,9 +252,6 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, KernelArgsTy *KernelArgs) { static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - - TIMESCOPE_WITH_NAME_AND_IDENT("Runtime target exe",Loc); - DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", DeviceId, DPxPTR(HostPtr)); @@ -279,7 +276,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(KernelArgs->ThreadLimit[0] == static_cast(ThreadLimit) && !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); - + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", + "NumTeams="+std::to_string(NumTeams)+ + ";NumArgs="+std::to_string(KernelArgs->NumArgs) + , Loc); + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, KernelArgs->ArgSizes, KernelArgs->ArgTypes, @@ -303,16 +304,17 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - + int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - + { + TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); - + } return OMP_TGT_SUCCESS; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index e113942375ef9c6..5f6168b0bd2fca0 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -579,7 +579,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - + TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev","Size="+std::to_string(ArgSizes[I])+"B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -863,7 +863,6 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - //TIMESCOPE_WITH_NAME_AND_IDENT("targetDataEnd", Loc); int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); // process each input. @@ -956,8 +955,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - std::string MessageDataSize = "DevToHost "+std::to_string(DataSize)+"B"; - TIMESCOPE_WITH_NAME_AND_IDENT(MessageDataSize, Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+std::to_string(DataSize)+"B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1447,7 +1445,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, SmallVector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - //TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); @@ -1494,8 +1491,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); continue; - } - TIMESCOPE_WITH_RTM_AND_IDENT("HostToDev", Loc); + } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1575,7 +1571,6 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, map_var_info_t *ArgNames, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - //TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; // Move data from device. @@ -1676,8 +1671,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); - + TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", + "NumArguments="+std::to_string(KernelArgs.NumArgs) + +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) + +";TripCount="+std::to_string(KernelArgs.Tripcount) + , Loc); + #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && "Multi dimensional launch not supported yet."); diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index f0591cd17b0fd15..4bc1db79de3f2b7 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -432,14 +432,18 @@ class ExponentialBackoff { #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ - std::string ProfileName = SI.getName(); \ + std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(ProfileName, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) +#define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ + SourceInfo SI(IDENT); \ + std::string ProfileLocation = SI.getProfileLocation(); \ + llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + Details) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) - +#define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) #endif >From c8bb24e807324a6a42b50076e5a3d2159f1d6d74 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 26 Sep 2023 15:58:50 +0000 Subject: [PATCH 6/9] Some fixes to the profiler --- openmp/libomptarget/src/api.cpp | 7 +++++++ openmp/libomptarget/src/interface.cpp | 16 +++++----------- openmp/libomptarget/src/omptarget.cpp | 5 ----- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 942df8fdb94d660..f628a64c5b69fa4 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,6 +50,7 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { + TIMESCOPE(); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -66,6 +67,7 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { } EXTERN void omp_target_free(void *Ptr, int DeviceNum) { + TIMESCOPE(); return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -134,6 +136,11 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { TIMESCOPE(); + /*TIMESCOPE_WITH_DETAILS_AND_IDENT("omp_target_memcpy", + "NumArguments="+std::to_string(KernelArgs.NumArgs) + +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) + +";TripCount="+std::to_string(KernelArgs.Tripcount) + , __FUNCTION__);*/ DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index bed9b1e40db455b..61a340ccf8d1b10 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -33,14 +33,12 @@ using namespace llvm::omp::target::ompt; //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { - //TIMESCOPE(); PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { - //TIMESCOPE(); if (PM->maybeDelayRegisterLib(Desc)) return; @@ -61,7 +59,6 @@ EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { - //TIMESCOPE(); PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { @@ -82,7 +79,6 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", @@ -307,13 +303,13 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - { + { //required to show syncronization TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); - if (Rc == OFFLOAD_SUCCESS) - Rc = AsyncInfo.synchronize(); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); - handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); - assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); + assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); } return OMP_TGT_SUCCESS; } @@ -411,7 +407,6 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, // Get the current number of components for a user-defined mapper. EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { - //TIMESCOPE(); auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; int64_t Size = MapperComponentsPtr->Components.size(); DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", @@ -423,7 +418,6 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, void *Begin, int64_t Size, int64_t Type, void *Name) { - //TIMESCOPE(); DP("__tgt_push_mapper_component(Handle=" DPxMOD ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s).\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 5f6168b0bd2fca0..450f34894fb56b4 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -398,7 +398,6 @@ static int32_t getParentIndex(int64_t Type) { void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -427,7 +426,6 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum, DPxPTR(DevicePtr)); @@ -453,7 +451,6 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -493,7 +490,6 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, } void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d unlocking\n", Name, DeviceNum); DeviceTy *DevicePtr = nullptr; @@ -572,7 +568,6 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - //TIMESCOPE_WITH_IDENT(Loc); // process each input. for (int32_t I = 0; I < ArgNum; ++I) { // Ignore private variables and arrays - there is no mapping for them. >From da71cf17918c56e6a64c1e966dbb5d0dd79d0ed9 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 26 Sep 2023 21:06:06 +0000 Subject: [PATCH 7/9] Adding information to some omp api calls --- openmp/libomptarget/src/api.cpp | 22 ++++++++++++---------- openmp/libomptarget/src/private.h | 7 +++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index f628a64c5b69fa4..5dd918808492997 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,7 +50,8 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DeviceNum) + +";size="+std::to_string(Size)); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -135,12 +136,9 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { - TIMESCOPE(); - /*TIMESCOPE_WITH_DETAILS_AND_IDENT("omp_target_memcpy", - "NumArguments="+std::to_string(KernelArgs.NumArgs) - +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) - +";TripCount="+std::to_string(KernelArgs.Tripcount) - , __FUNCTION__);*/ + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(Length)); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -293,7 +291,9 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(Length)); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -321,7 +321,6 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice) { - TIMESCOPE(); DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " @@ -380,7 +379,10 @@ EXTERN int omp_target_memcpy_rect_async( const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(ElementSize) + +";num_dims="+std::to_string(NumDims)); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index 4bc1db79de3f2b7..c8d07138b180d17 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -434,16 +434,19 @@ class ExponentialBackoff { std::string ProfileLocation = SI.getProfileLocation(); \ std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + Details) +#define TIMESCOPE_WITH_DETAILS(Details) \ + llvm::TimeTraceScope TimeScope(__FUNCTION__, Details) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) +#define TIMESCOPE_WITH_DETAILS(Details) #endif >From f273bbcc66f361fe9cc03d8597ee886122b5e235 Mon Sep 17 00:00:00 2001 From: fel-cab Date: Mon, 2 Oct 2023 12:26:51 +0000 Subject: [PATCH 8/9] Adding information to the LIBOMPTARGET profiler runtime kernel and API calls. --- openmp/libomptarget/src/interface.cpp | 14 ++++++++------ openmp/libomptarget/src/omptarget.cpp | 24 +++++++++++++++--------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 61a340ccf8d1b10..99a7abc7e0bcee9 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -79,7 +79,9 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", + "NumArgs="+ + std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -273,10 +275,10 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", - "NumTeams="+std::to_string(NumTeams)+ - ";NumArgs="+std::to_string(KernelArgs->NumArgs) - , Loc); - + "NumTeams="+std::to_string(NumTeams)+ + ";NumArgs="+ + std::to_string(KernelArgs->NumArgs), Loc); + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, KernelArgs->ArgSizes, KernelArgs->ArgTypes, @@ -300,7 +302,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - + int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); { //required to show syncronization diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 450f34894fb56b4..b5a2dfc68569081 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -574,7 +574,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev","Size="+std::to_string(ArgSizes[I])+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev", + "Size="+ + std::to_string(ArgSizes[I])+ + "B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -950,7 +953,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+std::to_string(DataSize)+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+ + std::to_string(DataSize)+"B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1486,7 +1490,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); continue; - } + } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1590,7 +1594,6 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, return Ret; }); - return OFFLOAD_SUCCESS; } } // namespace @@ -1667,11 +1670,14 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", - "NumArguments="+std::to_string(KernelArgs.NumArgs) - +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) - +";TripCount="+std::to_string(KernelArgs.Tripcount) - , Loc); - + "NumArguments="+ + std::to_string(KernelArgs.NumArgs)+ + ";NumTeams="+ + std::to_string(KernelArgs.NumTeams[0])+ + ";TripCount="+ + std::to_string(KernelArgs.Tripcount) + , Loc); + #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && "Multi dimensional launch not supported yet."); >From 08dbdd5ba1c0502b6d1c935bac6cc14acd4f04be Mon Sep 17 00:00:00 2001 From: fel-cab Date: Mon, 2 Oct 2023 19:14:01 +0000 Subject: [PATCH 9/9] Fixing format --- llvm/lib/Support/TimeProfiler.cpp | 10 +++---- openmp/libomptarget/src/api.cpp | 24 ++++++++--------- openmp/libomptarget/src/interface.cpp | 14 +++++----- openmp/libomptarget/src/omptarget.cpp | 39 ++++++++++++--------------- openmp/libomptarget/src/private.h | 2 +- 5 files changed, 42 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 4446583102a8133..330a4d93378affe 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -226,7 +226,7 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurNs / 1000 ); + J.attribute("dur", DurNs / 1000); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); @@ -261,10 +261,10 @@ struct llvm::TimeTraceProfiler { // Emit the absolute time when this TimeProfiler started. // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. - J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) - .time_since_epoch() - .count()/1000); + J.attribute("beginningOfTime", time_point_cast(BeginningOfTime) + .time_since_epoch() + .count() / + 1000); J.objectEnd(); } diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 5dd918808492997..06de1f8f20b7ae2 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,8 +50,8 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DeviceNum) - +";size="+std::to_string(Size)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) + + ";size=" + std::to_string(Size)); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -136,9 +136,9 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -291,9 +291,9 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -379,10 +379,10 @@ EXTERN int omp_target_memcpy_rect_async( const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(ElementSize) - +";num_dims="+std::to_string(NumDims)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(ElementSize) + + ";num_dims=" + std::to_string(NumDims)); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 99a7abc7e0bcee9..2c7ab7a49d0bfb0 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -80,8 +80,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", - "NumArgs="+ - std::to_string(ArgNum), Loc); + "NumArgs=" + std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -274,10 +273,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(KernelArgs->ThreadLimit[0] == static_cast(ThreadLimit) && !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", - "NumTeams="+std::to_string(NumTeams)+ - ";NumArgs="+ - std::to_string(KernelArgs->NumArgs), Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Runtime target exe", + "NumTeams=" + std::to_string(NumTeams) + + ";NumArgs=" + std::to_string(KernelArgs->NumArgs), + Loc); if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, @@ -305,7 +305,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - { //required to show syncronization + { // required to show syncronization TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index b5a2dfc68569081..277f95d7efa8201 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -574,10 +574,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev", - "Size="+ - std::to_string(ArgSizes[I])+ - "B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "HostToDev", "Size=" + std::to_string(ArgSizes[I]) + "B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -825,14 +823,13 @@ postProcessingTargetDataEnd(DeviceTy *Device, // remaining shadow pointer entries for this struct. const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM; if (HasFrom) { - Entry->foreachShadowPointerInfo( - [&](const ShadowPtrInfoTy &ShadowPtr) { - *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); - return OFFLOAD_SUCCESS; - }); + Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); } // Give up the lock as we either don't need it anymore (e.g., done with @@ -953,8 +950,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+ - std::to_string(DataSize)+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1669,14 +1666,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", - "NumArguments="+ - std::to_string(KernelArgs.NumArgs)+ - ";NumTeams="+ - std::to_string(KernelArgs.NumTeams[0])+ - ";TripCount="+ - std::to_string(KernelArgs.Tripcount) - , Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Kernel Target", + "NumArguments=" + std::to_string(KernelArgs.NumArgs) + + ";NumTeams=" + std::to_string(KernelArgs.NumTeams[0]) + + ";TripCount=" + std::to_string(KernelArgs.Tripcount), + Loc); #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index c8d07138b180d17..8657390dde17dc1 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -435,7 +435,7 @@ class ExponentialBackoff { std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) +// llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ From lldb-commits at lists.llvm.org Mon Oct 2 13:25:38 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 13:25:38 -0700 (PDT) Subject: [Lldb-commits] [lldb] [ELF] Handle relocations in synthetic .eh_frame with a non-zero offset within the output section (PR #65966) In-Reply-To: Message-ID: <651b2742.170a0220.a48b.f774@mx.google.com> https://github.com/simpal01 updated https://github.com/llvm/llvm-project/pull/65966 >From 43c156c679951cc3d827fdb7604e30aab658fd9a Mon Sep 17 00:00:00 2001 From: Simi Pallipurath Date: Mon, 11 Sep 2023 14:42:27 +0100 Subject: [PATCH 1/3] [LLD][AARCH64] lld incorrectly handles .eh_frame when it has a non-zero offset within its output section. When the .eh_frame section is placed at a non-zero offset within its output section, the relocation value within .eh_frame are computed incorrectly. We had similar issue in AArch32 and it has been fixed already in https://reviews.llvm.org/D148033. While applying the relocation using S+A-P, the value of P (the location of the relocation) is getting wrong. P is: P = SecAddr + rel.offset, But SecAddr points to the starting address of the outputsection rather than the starting address of the eh frame section within that output section. --- lld/ELF/Arch/AArch64.cpp | 3 ++ lld/test/ELF/eh-frame-nonzero-offset.s | 55 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 lld/test/ELF/eh-frame-nonzero-offset.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 174a0a3624f7765..09477141c777948 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -770,6 +770,9 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast(&sec)) secAddr += s->outSecOff; + else if (auto *eh = dyn_cast(&sec)) + if (InputSection *isec = eh->getParent()) + secAddr += isec->outSecOff; AArch64Relaxer relaxer(sec.relocs()); for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { const Relocation &rel = sec.relocs()[i]; diff --git a/lld/test/ELF/eh-frame-nonzero-offset.s b/lld/test/ELF/eh-frame-nonzero-offset.s new file mode 100644 index 000000000000000..ef086fcf670d81b --- /dev/null +++ b/lld/test/ELF/eh-frame-nonzero-offset.s @@ -0,0 +1,55 @@ +// REQUIRES: aarch64 +// RUN: rm -rf %t && split-file %s %t + +// RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o +// RUN: ld.lld %t/a.o -T %t/eh-frame-non-zero-offset.t -o %t/non-zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/non-zero | FileCheck --check-prefix=NONZERO %s +// RUN: ld.lld %t/a.o -T %t/eh-frame-zero-offset.t -o %t/zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/zero | FileCheck --check-prefix=ZERO %s + +// NONZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// NONZERO-NEXT: {{[0-9]+}}: 00000000000000ac {{.*}} __eh_frame_end + +// NONZERO: 0x00000078 00000000 00000000 10000000 00000000 +// NONZERO-NEXT: 0x00000088 017a5200 017c1e01 1b0c1f00 10000000 +// NONZERO-NEXT: 0x00000098 18000000 64ffffff 08000000 00000000 +// NONZERO-NEXT: 0x000000a8 00000000 + +// ZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// ZERO-NEXT: {{[0-9]+}}: 00000000000000ac {{.*}} __eh_frame_end + +// ZERO: 0x00000080 10000000 00000000 017a5200 017c1e01 +// ZERO-NEXT: 0x00000090 1b0c1f00 10000000 18000000 64ffffff +// ZERO-NEXT: 0x000000a0 08000000 00000000 00000000 + +//--- eh-frame-non-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : { + /* Alignment padding within .eh_frame */ + . = ALIGN(128); + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- eh-frame-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : ALIGN(128) { + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- a.s +.section .text.01, "ax",%progbits +.global f1 +.type f1, %function +f1: +.cfi_startproc + nop + nop +.cfi_endproc >From bf78a2ee26fac6235f9cb32ced10ae7c92f4b579 Mon Sep 17 00:00:00 2001 From: Simi Pallipurath Date: Mon, 18 Sep 2023 19:50:56 +0100 Subject: [PATCH 2/3] fixup! [LLD][AARCH64] lld incorrectly handles .eh_frame when it has a non-zero offset within its output section. --- lld/ELF/Arch/AArch64.cpp | 7 ++- lld/ELF/Arch/PPC64.cpp | 4 ++ lld/ELF/Arch/X86_64.cpp | 4 ++ lld/ELF/SyntheticSections.cpp | 7 ++- lld/ELF/Target.cpp | 4 ++ ...et.s => eh-frame-nonzero-offset-aarch64.s} | 0 lld/test/ELF/eh-frame-nonzero-offset-arm.s | 55 +++++++++++++++++++ lld/test/ELF/eh-frame-nonzero-offset-ppc.s | 54 ++++++++++++++++++ lld/test/ELF/eh-frame-nonzero-offset-x86.s | 54 ++++++++++++++++++ 9 files changed, 185 insertions(+), 4 deletions(-) rename lld/test/ELF/{eh-frame-nonzero-offset.s => eh-frame-nonzero-offset-aarch64.s} (100%) create mode 100644 lld/test/ELF/eh-frame-nonzero-offset-arm.s create mode 100644 lld/test/ELF/eh-frame-nonzero-offset-ppc.s create mode 100644 lld/test/ELF/eh-frame-nonzero-offset-x86.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 09477141c777948..e7b98419d382583 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -770,9 +770,10 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast(&sec)) secAddr += s->outSecOff; - else if (auto *eh = dyn_cast(&sec)) - if (InputSection *isec = eh->getParent()) - secAddr += isec->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) { + SyntheticSection *ehFrame = ehIn->getParent(); + secAddr += ehFrame->outSecOff; + } AArch64Relaxer relaxer(sec.relocs()); for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { const Relocation &rel = sec.relocs()[i]; diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 969d9326a7fc962..6f2d4d8e46c5535 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -1563,6 +1563,10 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) { + SyntheticSection *ehFrame = ehIn->getParent(); + secAddr += ehFrame->outSecOff; + } uint64_t lastPPCRelaxedRelocOff = -1; for (const Relocation &rel : sec.relocs()) { uint8_t *loc = buf + rel.offset; diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 349ccd218a579e4..1fd9dd4f21944ba 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -989,6 +989,10 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) { + SyntheticSection *ehFrame = ehIn->getParent(); + secAddr += ehFrame->outSecOff; + } for (const Relocation &rel : sec.relocs()) { if (rel.expr == R_NONE) // See deleteFallThruJmpInsn continue; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index f412efa36480284..416ebdc266eaa82 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -583,9 +583,14 @@ static uint64_t readFdeAddr(uint8_t *buf, int size) { uint64_t EhFrameSection::getFdePc(uint8_t *buf, size_t fdeOff, uint8_t enc) const { // The starting address to which this FDE applies is - // stored at FDE + 8 byte. + // stored at FDE + 8 byte. And this offset is within + // the .eh_frame section. size_t off = fdeOff + 8; uint64_t addr = readFdeAddr(buf + off, enc & 0xf); + // Adding outSecOff as finalizeAddressDependentContent() + // may have altered the corresponding outSecOff. This is + // required to get the correct PC relative offset. + off = off + outSecOff; if ((enc & 0x70) == DW_EH_PE_absptr) return addr; if ((enc & 0x70) == DW_EH_PE_pcrel) diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 32bb2164a208b85..84f7b4844c2a34c 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -159,6 +159,10 @@ void TargetInfo::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) { + SyntheticSection *ehFrame = ehIn->getParent(); + secAddr += ehFrame->outSecOff; + } for (const Relocation &rel : sec.relocs()) { uint8_t *loc = buf + rel.offset; const uint64_t val = SignExtend64( diff --git a/lld/test/ELF/eh-frame-nonzero-offset.s b/lld/test/ELF/eh-frame-nonzero-offset-aarch64.s similarity index 100% rename from lld/test/ELF/eh-frame-nonzero-offset.s rename to lld/test/ELF/eh-frame-nonzero-offset-aarch64.s diff --git a/lld/test/ELF/eh-frame-nonzero-offset-arm.s b/lld/test/ELF/eh-frame-nonzero-offset-arm.s new file mode 100644 index 000000000000000..2461c3c585edfd3 --- /dev/null +++ b/lld/test/ELF/eh-frame-nonzero-offset-arm.s @@ -0,0 +1,55 @@ +// REQUIRES: arm +// RUN: rm -rf %t && split-file %s %t + +// RUN: llvm-mc -filetype=obj -triple=arm %t/a.s -o %t/a.o +// RUN: ld.lld %t/a.o -T %t/eh-frame-non-zero-offset.t -o %t/non-zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/non-zero | FileCheck --check-prefix=NONZERO %s +// RUN: ld.lld %t/a.o -T %t/eh-frame-zero-offset.t -o %t/zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/zero | FileCheck --check-prefix=ZERO %s + +// NONZERO: {{[0-9]+}}: 00000080 {{.*}} __eh_frame_start +// NONZERO-NEXT: {{[0-9]+}}: 000000ac {{.*}} __eh_frame_end + +// NONZERO: 0x00000074 00000000 00000000 00000000 10000000 +// NONZERO-NEXT: 0x00000084 00000000 017a5200 017c0e01 1b0c0d00 +// NONZERO-NEXT: 0x00000094 10000000 18000000 64ffffff 04000000 +// NONZERO-NEXT: 0x000000a4 00000000 00000000 + +// ZERO: {{[0-9]+}}: 00000080 {{.*}} __eh_frame_start +// ZERO-NEXT: {{[0-9]+}}: 000000ac {{.*}} __eh_frame_end + +// ZERO: 0x00000080 10000000 00000000 017a5200 017c0e01 +// ZERO-NEXT: 0x00000090 1b0c0d00 10000000 18000000 64ffffff +// ZERO-NEXT: 0x000000a0 04000000 00000000 00000000 + +//--- eh-frame-non-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : { + /* Alignment padding within .eh_frame */ + . = ALIGN(128); + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- eh-frame-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : ALIGN(128) { + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- a.s +.section .text.01, "ax",%progbits +.global f1 +.type f1, %function +f1: +.cfi_startproc +.cfi_sections .eh_frame +.space 4 +.cfi_endproc diff --git a/lld/test/ELF/eh-frame-nonzero-offset-ppc.s b/lld/test/ELF/eh-frame-nonzero-offset-ppc.s new file mode 100644 index 000000000000000..2b736ab383ffeca --- /dev/null +++ b/lld/test/ELF/eh-frame-nonzero-offset-ppc.s @@ -0,0 +1,54 @@ +// REQUIRES: ppc +// RUN: rm -rf %t && split-file %s %t + +// RUN: llvm-mc -filetype=obj -triple=ppc64le %t/a.s -o %t/a.o +// RUN: ld.lld %t/a.o -T %t/eh-frame-non-zero-offset.t -o %t/non-zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/non-zero | FileCheck --check-prefix=NONZERO %s +// RUN: ld.lld %t/a.o -T %t/eh-frame-zero-offset.t -o %t/zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/zero | FileCheck --check-prefix=ZERO %s + +// NONZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// NONZERO-NEXT: {{[0-9]+}}: 00000000000000ac {{.*}} __eh_frame_end + +// NONZERO: 0x00000078 00000000 00000000 10000000 00000000 +// NONZERO-NEXT: 0x00000088 017a5200 04784101 1b0c0100 10000000 +// NONZERO-NEXT: 0x00000098 18000000 64ffffff 04000000 00000000 +// NONZERO-NEXT: 0x000000a8 00000000 + +// ZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// ZERO-NEXT: {{[0-9]+}}: 00000000000000ac {{.*}} __eh_frame_end + +// ZERO: 0x00000080 10000000 00000000 017a5200 04784101 +// ZERO-NEXT: 0x00000090 1b0c0100 10000000 18000000 64ffffff +// ZERO-NEXT: 0x000000a0 04000000 00000000 00000000 + +//--- eh-frame-non-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : { + /* Alignment padding within .eh_frame */ + . = ALIGN(128); + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- eh-frame-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : ALIGN(128) { + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- a.s +.section .text.01, "ax",%progbits +.global f1 +.type f1, %function +f1: +.cfi_startproc +.space 4 +.cfi_endproc diff --git a/lld/test/ELF/eh-frame-nonzero-offset-x86.s b/lld/test/ELF/eh-frame-nonzero-offset-x86.s new file mode 100644 index 000000000000000..d5537346c338e08 --- /dev/null +++ b/lld/test/ELF/eh-frame-nonzero-offset-x86.s @@ -0,0 +1,54 @@ +// REQUIRES: x86 +// RUN: rm -rf %t && split-file %s %t + +// RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o +// RUN: ld.lld %t/a.o -T %t/eh-frame-non-zero-offset.t -o %t/non-zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/non-zero | FileCheck --check-prefix=NONZERO %s +// RUN: ld.lld %t/a.o -T %t/eh-frame-zero-offset.t -o %t/zero +// RUN: llvm-readelf --program-headers --unwind --symbols -x .eh_frame %t/zero | FileCheck --check-prefix=ZERO %s + +// NONZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// NONZERO-NEXT: {{[0-9]+}}: 00000000000000b4 {{.*}} __eh_frame_end + +// NONZERO: 0x00000078 00000000 00000000 14000000 00000000 +// NONZERO-NEXT: 0x00000088 017a5200 01781001 1b0c0708 90010000 +// NONZERO-NEXT: 0x00000098 14000000 1c000000 60ffffff 04000000 +// NONZERO-NEXT: 0x000000a8 00000000 00000000 00000000 + +// ZERO: {{[0-9]+}}: 0000000000000080 {{.*}} __eh_frame_start +// ZERO-NEXT: {{[0-9]+}}: 00000000000000b4 {{.*}} __eh_frame_end + +// ZERO: 0x00000080 14000000 00000000 017a5200 01781001 +// ZERO-NEXT: 0x00000090 1b0c0708 90010000 14000000 1c000000 +// ZERO-NEXT: 0x000000a0 60ffffff 04000000 00000000 00000000 +// ZERO-NEXT: 0x000000b0 00000000 + +//--- eh-frame-non-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : { + /* Alignment padding within .eh_frame */ + . = ALIGN(128); + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- eh-frame-zero-offset.t +SECTIONS { + .text : { *(.text .text.*) } + .eh_frame : ALIGN(128) { + __eh_frame_start = .; + *(.eh_frame .eh_frame.*) ; + __eh_frame_end = .; + } +} + +//--- a.s +.section .text +.globl f1 +f1: +.cfi_startproc +.space 4 +.cfi_endproc >From a6e2b6c89af6c2a9fd813686e2a48a19111851ae Mon Sep 17 00:00:00 2001 From: Simi Pallipurath Date: Sat, 30 Sep 2023 17:13:21 +0100 Subject: [PATCH 3/3] fixup! [LLD]lld incorrectly handles .eh_frame when it has a non-zero offset within its output section. Adding outSecOff in eh_frame_hdr section as finalizeAddressDependentContent() may have altered the corresponding outSecOff. This is required to get the correct initial location of each FDE entries. --- lld/ELF/SyntheticSections.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 416ebdc266eaa82..0f7ebf9d7ba840b 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -587,14 +587,10 @@ uint64_t EhFrameSection::getFdePc(uint8_t *buf, size_t fdeOff, // the .eh_frame section. size_t off = fdeOff + 8; uint64_t addr = readFdeAddr(buf + off, enc & 0xf); - // Adding outSecOff as finalizeAddressDependentContent() - // may have altered the corresponding outSecOff. This is - // required to get the correct PC relative offset. - off = off + outSecOff; if ((enc & 0x70) == DW_EH_PE_absptr) return addr; if ((enc & 0x70) == DW_EH_PE_pcrel) - return addr + getParent()->addr + off; + return addr + getParent()->addr + off + outSecOff; fatal("unknown FDE size relative encoding"); } From lldb-commits at lists.llvm.org Mon Oct 2 13:45:22 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Mon, 02 Oct 2023 13:45:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651b2be2.630a0220.83a15.8198@mx.google.com> https://github.com/junior-jl updated https://github.com/llvm/llvm-project/pull/67019 >From c2396253b9584af9eabe1e67ed922f5f5f0e879c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Thu, 21 Sep 2023 11:12:58 -0300 Subject: [PATCH 1/3] [lldb] add stop-at-user-entry option to process launch [lldb] add command start [lldb] add stop-at-main option to process launch Revert "[lldb] add command start" This reverts commit 11270775865a8415e00b4d899703f84717344967. [lldb] remove shell modification | make bp one-shot add GetUserEntryPointName method | changed bp creating method use clang-format change option description | using set for entrypoint names use SetVector | change comments style replace const char* by StringRef | change if statements to return early --- lldb/include/lldb/Target/Language.h | 20 ++++++-- .../Commands/CommandOptionsProcessLaunch.cpp | 48 ++++++++++++++++++- lldb/source/Commands/Options.td | 4 ++ .../Language/CPlusPlus/CPlusPlusLanguage.h | 6 ++- .../Plugins/Language/ObjC/ObjCLanguage.h | 2 + .../ObjCPlusPlus/ObjCPlusPlusLanguage.h | 2 + 6 files changed, 73 insertions(+), 9 deletions(-) diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index a6b9ccaf31b3c42..cf781fc0e8dd5ee 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -95,21 +95,24 @@ class Language : public PluginInterface { class EitherTypeScavenger : public TypeScavenger { public: EitherTypeScavenger() : TypeScavenger() { - for (std::shared_ptr scavenger : { std::shared_ptr(new ScavengerTypes())... }) { + for (std::shared_ptr scavenger : + {std::shared_ptr(new ScavengerTypes())...}) { if (scavenger) m_scavengers.push_back(scavenger); } } + protected: bool Find_Impl(ExecutionContextScope *exe_scope, const char *key, ResultSet &results) override { const bool append = false; - for (auto& scavenger : m_scavengers) { + for (auto &scavenger : m_scavengers) { if (scavenger && scavenger->Find(exe_scope, key, results, append)) return true; } return false; } + private: std::vector> m_scavengers; }; @@ -118,22 +121,25 @@ class Language : public PluginInterface { class UnionTypeScavenger : public TypeScavenger { public: UnionTypeScavenger() : TypeScavenger() { - for (std::shared_ptr scavenger : { std::shared_ptr(new ScavengerTypes())... }) { + for (std::shared_ptr scavenger : + {std::shared_ptr(new ScavengerTypes())...}) { if (scavenger) m_scavengers.push_back(scavenger); } } + protected: bool Find_Impl(ExecutionContextScope *exe_scope, const char *key, ResultSet &results) override { const bool append = true; bool success = false; - for (auto& scavenger : m_scavengers) { + for (auto &scavenger : m_scavengers) { if (scavenger) success = scavenger->Find(exe_scope, key, results, append) || success; } return success; } + private: std::vector> m_scavengers; }; @@ -160,6 +166,10 @@ class Language : public PluginInterface { virtual lldb::LanguageType GetLanguageType() const = 0; + // Implement this function to return the user-defined entry point name + // for the language + virtual llvm::StringRef GetUserEntryPointName() const { return {}; } + virtual bool IsTopLevelFunction(Function &function); virtual bool IsSourceFile(llvm::StringRef file_path) const = 0; @@ -232,7 +242,7 @@ class Language : public PluginInterface { // a match. But we wouldn't want this to match AnotherA::my_function. The // user is specifying a truncated path, not a truncated set of characters. // This function does a language-aware comparison for those purposes. - virtual bool DemangledNameContainsPath(llvm::StringRef path, + virtual bool DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const; // if a language has a custom format for printing variable declarations that diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 85ad8ff5e07132c..2645b7bdd8c4ae6 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -8,6 +8,7 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" @@ -15,11 +16,13 @@ #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; using namespace lldb; @@ -38,7 +41,48 @@ Status CommandOptionsProcessLaunch::SetOptionValue( case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; - + case 'm': // Stop at user entry point + { + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); + break; + } + std::string entryPointName = lang->GetUserEntryPointName().str(); + if (!entryPointName.empty()) + entryPointNamesSet.insert(entryPointName); + } + if (entryPointNamesSet.empty()) { + error.SetErrorString("No entry point name found\n"); + break; + } + BreakpointSP bp_sp = target_sp->CreateBreakpoint( + &shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware + ); + if (!bp_sp) { + error.SetErrorString("Breakpoint creation failed.\n"); + break; + } + bp_sp->SetOneShot(true); + break; + } case 'i': // STDIN for read only { FileAction action; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..dd4cf5c4dc043e7 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -675,6 +675,10 @@ let Command = "platform shell" in { let Command = "process launch" in { def process_launch_stop_at_entry : Option<"stop-at-entry", "s">, Desc<"Stop at the entry point of the program when launching a process.">; + def process_launch_stop_at_user_entry : Option<"stop-at-user-entry", "m">, + Desc<"Stop at the user entry point when launching a process. For C based " + "languages this will be the 'main' function, but this might differ for " + "other languages.">; def process_launch_disable_aslr : Option<"disable-aslr", "A">, Arg<"Boolean">, Desc<"Set whether to disable address space layout randomization when launching a process.">; def process_launch_plugin : Option<"plugin", "P">, Arg<"Plugin">, diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 7712a60b7795951..bde34e710a44d85 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -103,6 +103,8 @@ class CPlusPlusLanguage : public Language { return lldb::eLanguageTypeC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + std::unique_ptr GetTypeScavenger() override; lldb::TypeCategoryImplSP GetFormatters() override; @@ -129,8 +131,8 @@ class CPlusPlusLanguage : public Language { static llvm::StringRef GetPluginNameStatic() { return "cplusplus"; } bool SymbolNameFitsToLanguage(Mangled mangled) const override; - - bool DemangledNameContainsPath(llvm::StringRef path, + + bool DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const override; ConstString diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index bb8057846bb7c30..a50f4b036108d7a 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -127,6 +127,8 @@ class ObjCLanguage : public Language { return lldb::eLanguageTypeObjC; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + // Get all possible names for a method. Examples: // If method_name is "+[NSString(my_additions) myStringWithCString:]" // variant_names[0] => "+[NSString myStringWithCString:]" diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h index b7c71b5dbb1c991..1beab9348eb72e8 100644 --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -27,6 +27,8 @@ class ObjCPlusPlusLanguage : public Language { return lldb::eLanguageTypeObjC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + llvm::StringRef GetNilReferenceSummaryString() override { return "nil"; } bool IsSourceFile(llvm::StringRef file_path) const override; >From 596ff3c8252288f791e2c2173443ce31c000b0f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Sun, 1 Oct 2023 16:50:55 -0300 Subject: [PATCH 2/3] implement CreateBreakpointAtUserEntry in Target.[cpp/h] --- lldb/include/lldb/Target/Target.h | 2 + .../Commands/CommandOptionsProcessLaunch.cpp | 37 +--------------- lldb/source/Target/Target.cpp | 42 +++++++++++++++++++ 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index e9e531d0e12640a..82a343ee03fb516 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,6 +654,8 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); + lldb::BreakpointSP CreateBreakpointAtUserEntry(); + // Use this to create a file and line breakpoint to a given module or all // module it is nullptr lldb::BreakpointSP CreateBreakpoint(const FileSpecList *containingModules, diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 2645b7bdd8c4ae6..3055e4ca45bd230 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -45,42 +45,7 @@ Status CommandOptionsProcessLaunch::SetOptionValue( { TargetSP target_sp = execution_context ? execution_context->GetTargetSP() : TargetSP(); - ModuleSP main_module_sp = target_sp->GetExecutableModule(); - FileSpecList shared_lib_filter; - shared_lib_filter.Append(main_module_sp->GetFileSpec()); - llvm::SetVector, - std::unordered_set> - entryPointNamesSet; - for (LanguageType lang_type : Language::GetSupportedLanguages()) { - Language *lang = Language::FindPlugin(lang_type); - if (!lang) { - error.SetErrorString("Language not found\n"); - break; - } - std::string entryPointName = lang->GetUserEntryPointName().str(); - if (!entryPointName.empty()) - entryPointNamesSet.insert(entryPointName); - } - if (entryPointNamesSet.empty()) { - error.SetErrorString("No entry point name found\n"); - break; - } - BreakpointSP bp_sp = target_sp->CreateBreakpoint( - &shared_lib_filter, - nullptr, // containingSourceFiles - entryPointNamesSet.takeVector(), - eFunctionNameTypeFull, // func_name_type_mask - eLanguageTypeUnknown, // language - 0, // offset - eLazyBoolNo, // skip_prologue - false, // internal - false // hardware - ); - if (!bp_sp) { - error.SetErrorString("Breakpoint creation failed.\n"); - break; - } - bp_sp->SetOneShot(true); + target_sp->CreateBreakpointAtUserEntry(); break; } case 'i': // STDIN for read only diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f197b1b1aa097c3..013d72bda6308be 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); + return lldb::BreakpointSP(); + } + std::string entryPointName = lang->GetUserEntryPointName().str(); + if (!entryPointName.empty()) + entryPointNamesSet.insert(entryPointName); + } + if (entryPointNamesSet.empty()) { + error.SetErrorString("No entry point name found\n"); + return lldb::BreakpointSP(); + } + BreakpointSP bp_sp = + target_sp->CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware + ); + if (!bp_sp) { + error.SetErrorString("Breakpoint creation failed.\n"); + return lldb::BreakpointSP(); + } + bp_sp->SetOneShot(true); + return bp_sp; +} + BreakpointSP Target::CreateSourceRegexBreakpoint( const FileSpecList *containingModules, const FileSpecList *source_file_spec_list, >From 8f473782ec881385c7af6d0dea611e0c00dee4e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Mon, 2 Oct 2023 17:45:03 -0300 Subject: [PATCH 3/3] add shell test for process launch -m --- .../Shell/Commands/command-process-launch-user-entry.test | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 lldb/test/Shell/Commands/command-process-launch-user-entry.test diff --git a/lldb/test/Shell/Commands/command-process-launch-user-entry.test b/lldb/test/Shell/Commands/command-process-launch-user-entry.test new file mode 100644 index 000000000000000..32ef710fe567439 --- /dev/null +++ b/lldb/test/Shell/Commands/command-process-launch-user-entry.test @@ -0,0 +1,8 @@ +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -s %s -o exit | FileCheck %s + +process launch -m +# CHECK-LABEL: process launch -m +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = one-shot breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c \ No newline at end of file From lldb-commits at lists.llvm.org Mon Oct 2 13:50:56 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Mon, 02 Oct 2023 13:50:56 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651b2d30.630a0220.1f4f.0892@mx.google.com> junior-jl wrote: > Could you file a new issue describing this behavior and pasting the link here. Done! https://github.com/llvm/llvm-project/issues/68035 > Your test looks fine to me, so we can move on with it to merge your PR. I'll update it after we fix that other bug. Great. Added the test in the latest commit. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Mon Oct 2 14:18:47 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:18:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b33b7.a70a0220.bd630.4a50@mx.google.com> https://github.com/jeffreytan81 updated https://github.com/llvm/llvm-project/pull/68012 >From e7e47a211ebaaa0f6380810b6573fadde12ca02d Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 10:53:17 -0700 Subject: [PATCH 1/2] Implement data formatters for LibStdC++ std::variant --- lldb/examples/synthetic/gnu_libstdcpp.py | 89 +++++++++++++++++++ .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 18 +++- .../libstdcpp/variant/Makefile | 5 ++ .../TestDataFormatterLibStdcxxVariant.py | 72 +++++++++++++++ .../libstdcpp/variant/main.cpp | 79 ++++++++++++++++ 5 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..7462db744674682 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,92 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not ( + index_obj + and index_obj.IsValid() + and data_obj + and data_obj.IsValid() + ): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..d8a30729b6d02e1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,11 +332,11 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), context, identifier); @@ -372,7 +372,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +388,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1104,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1153,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..88be87a5469e196 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} >From 53d20815eadd368ff1185af520bcb4aa75c9e96e Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 14:18:14 -0700 Subject: [PATCH 2/2] Fix formatter --- lldb/examples/synthetic/gnu_libstdcpp.py | 7 +------ .../Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 7462db744674682..29c926167fb440c 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -898,12 +898,7 @@ def VariantSummaryProvider(valobj, dict): raw_obj = valobj.GetNonSyntheticValue() index_obj = raw_obj.GetChildMemberWithName("_M_index") data_obj = raw_obj.GetChildMemberWithName("_M_u") - if not ( - index_obj - and index_obj.IsValid() - and data_obj - and data_obj.IsValid() - ): + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): return "" def get_variant_npos_value(index_byte_size): diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index d8a30729b6d02e1..a285864ca2e1229 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1105,10 +1105,10 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); cpp_category_sp->AddTypeSynthetic( - "^std::variant<.+>$", eFormatterMatchRegex, - SyntheticChildrenSP(new ScriptedSyntheticChildren( - stl_synth_flags, - "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); From lldb-commits at lists.llvm.org Mon Oct 2 14:19:02 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:19:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b33c6.650a0220.196ca.7bb6@mx.google.com> https://github.com/jeffreytan81 ready_for_review https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 14:20:08 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:20:08 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b3408.170a0220.da1b7.5829@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes This patch implements the data formatters for LibStdC++ `std::variant`. --- Full diff: https://github.com/llvm/llvm-project/pull/68012.diff 5 Files Affected: - (modified) lldb/examples/synthetic/gnu_libstdcpp.py (+84) - (modified) lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp (+14-4) - (added) lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile (+5) - (added) lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py (+72) - (added) lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp (+79) ``````````diff diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..29c926167fb440c 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,87 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..a285864ca2e1229 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,11 +332,11 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), context, identifier); @@ -372,7 +372,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +388,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1104,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1153,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..88be87a5469e196 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} ``````````
https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 14:31:58 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:31:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b36ce.630a0220.2a194.29e3@mx.google.com> https://github.com/jeffreytan81 updated https://github.com/llvm/llvm-project/pull/68012 >From e7e47a211ebaaa0f6380810b6573fadde12ca02d Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 10:53:17 -0700 Subject: [PATCH 1/3] Implement data formatters for LibStdC++ std::variant --- lldb/examples/synthetic/gnu_libstdcpp.py | 89 +++++++++++++++++++ .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 18 +++- .../libstdcpp/variant/Makefile | 5 ++ .../TestDataFormatterLibStdcxxVariant.py | 72 +++++++++++++++ .../libstdcpp/variant/main.cpp | 79 ++++++++++++++++ 5 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..7462db744674682 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,92 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not ( + index_obj + and index_obj.IsValid() + and data_obj + and data_obj.IsValid() + ): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..d8a30729b6d02e1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,11 +332,11 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), context, identifier); @@ -372,7 +372,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +388,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1104,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1153,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..88be87a5469e196 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} >From 53d20815eadd368ff1185af520bcb4aa75c9e96e Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 14:18:14 -0700 Subject: [PATCH 2/3] Fix formatter --- lldb/examples/synthetic/gnu_libstdcpp.py | 7 +------ .../Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 7462db744674682..29c926167fb440c 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -898,12 +898,7 @@ def VariantSummaryProvider(valobj, dict): raw_obj = valobj.GetNonSyntheticValue() index_obj = raw_obj.GetChildMemberWithName("_M_index") data_obj = raw_obj.GetChildMemberWithName("_M_u") - if not ( - index_obj - and index_obj.IsValid() - and data_obj - and data_obj.IsValid() - ): + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): return "" def get_variant_npos_value(index_byte_size): diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index d8a30729b6d02e1..a285864ca2e1229 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1105,10 +1105,10 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); cpp_category_sp->AddTypeSynthetic( - "^std::variant<.+>$", eFormatterMatchRegex, - SyntheticChildrenSP(new ScriptedSyntheticChildren( - stl_synth_flags, - "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); >From eebb06c9b893798a1f2b22502bf32dad48b644eb Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 14:31:36 -0700 Subject: [PATCH 3/3] Fix formatter again --- .../source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index a285864ca2e1229..ad6d627938c0520 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -336,10 +336,8 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success - = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), - context, - identifier); + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier( + path_str.c_str(), context, identifier); if (!success) return m_full.GetStringRef().contains(path); From lldb-commits at lists.llvm.org Mon Oct 2 14:34:01 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 14:34:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b3749.170a0220.a15db.00e0@mx.google.com> ================ @@ -0,0 +1,252 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// template S, class T, class Proj = identity> +// requires indirect_binary_predicate, const T*> +// constexpr bool ranges::contains(I first, S last, const T& value, Proj proj = {}); // since C++23 + +// template +// requires indirect_binary_predicate, Proj>, const T*> +// constexpr bool ranges::contains(R&& r, const T& value, Proj proj = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "boolean_testable.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsIt = requires(Iter iter, Sent sent) { std::ranges::contains(iter, sent, *iter); }; + +static_assert(HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt, SentinelForNotSemiregular>); +static_assert(!HasContainsIt, InputRangeNotSentinelEqualityComparableWith>); +static_assert(!HasContainsIt, sentinel_wrapper>>); + +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(HasContainsIt); + +template +concept HasContainsR = requires(Range&& range) { std::ranges::contains(std::forward(range), ValT{}); }; + +static_assert(!HasContainsR); +static_assert(HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); + +static std::vector comparable_data; + +template +constexpr void test_iterators() { + using ValueT = std::iter_value_t; + { // simple tests + ValueT a[] = {1, 2, 3, 4, 5, 6}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 6))); + { + [[maybe_unused]] std::same_as decltype(auto) ret = ---------------- var-const wrote: We're using `ret` in the assert below, so I think this `maybe_unused` shouldn't be necessary. https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 14:34:15 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 14:34:15 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b3757.170a0220.f879e.f19d@mx.google.com> ================ @@ -0,0 +1,252 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// template S, class T, class Proj = identity> +// requires indirect_binary_predicate, const T*> +// constexpr bool ranges::contains(I first, S last, const T& value, Proj proj = {}); // since C++23 + +// template +// requires indirect_binary_predicate, Proj>, const T*> +// constexpr bool ranges::contains(R&& r, const T& value, Proj proj = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "boolean_testable.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsIt = requires(Iter iter, Sent sent) { std::ranges::contains(iter, sent, *iter); }; + +static_assert(HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt, SentinelForNotSemiregular>); +static_assert(!HasContainsIt, InputRangeNotSentinelEqualityComparableWith>); +static_assert(!HasContainsIt, sentinel_wrapper>>); + +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(HasContainsIt); + +template +concept HasContainsR = requires(Range&& range) { std::ranges::contains(std::forward(range), ValT{}); }; + +static_assert(!HasContainsR); +static_assert(HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); + +static std::vector comparable_data; + +template +constexpr void test_iterators() { + using ValueT = std::iter_value_t; + { // simple tests + ValueT a[] = {1, 2, 3, 4, 5, 6}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 6))); + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole.begin(), whole.end(), 3); + assert(ret); + } + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole, 3); + assert(ret); + } + } + + { // check that a range with a single element works + ValueT a[] = {32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 1))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that an empty range works + ValueT a[] = {}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 1); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 1); + assert(!ret); + } + } + + { // check that the first element matches + ValueT a[] = {32, 3, 2, 1, 0, 23, 21, 9, 40, 100}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 10))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that the last element matches + ValueT a[] = {3, 22, 1, 43, 99, 0, 56, 100, 32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 9))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // no match + ValueT a[] = {13, 1, 21, 4, 5}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 10); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 10); + assert(!ret); + } + } + + { // check that the projections are used + int a[] = {1, 9, 0, 13, 25}; + { + bool ret = std::ranges::contains(a, a + 5, -13, [&](int i) { return i * -1; }); + assert(ret); + } + { + auto range = std::ranges::subrange(a, a + 5); + bool ret = std::ranges::contains(range, -13, [&](int i) { return i * -1; }); + assert(ret); + } + } + + { // check the nodiscard extension + // use #pragma around to suppress error: ignoring return value of function + // declared with 'nodiscard' attribute [-Werror,-Wunused-result] + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunused-result" + ValueT a[] = {1, 9, 0, 13, 25}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + std::ranges::contains(whole, 12); + #pragma clang diagnostic pop + } + + if (!std::is_constant_evaluated()) + comparable_data.clear(); +} + +template +class TriviallyComparable { + ElementT el_; + +public: + constexpr TriviallyComparable(ElementT el) : el_(el) {} + bool operator==(const TriviallyComparable&) const = default; +}; + +template +class Comparable { + IndexT index_; + +public: + Comparable(IndexT i) + : index_([&]() { + IndexT size = static_cast(comparable_data.size()); + comparable_data.push_back(i); + return size; + }()) {} + + bool operator==(const Comparable& other) const { + return comparable_data[other.index_] == comparable_data[index_]; + } + + friend bool operator==(const Comparable& lhs, long long rhs) { return comparable_data[lhs.index_] == rhs; } +}; + +constexpr bool test() { + types::for_each(types::type_list, TriviallyComparable>{}, + [] { + types::for_each(types::cpp20_input_iterator_list{}, + [] { + if constexpr (std::forward_iterator) + test_iterators(); + test_iterators>(); + test_iterators>(); + }); + }); + + { + int a[] = {1, 9, 0, 13, 25}; + int projection_count = 0; + // count invocations of the projection + { + bool ret = std::ranges::contains(a, a + 5, 0, + [&](int i) { ++projection_count; return i; }); + assert(ret); + assert(projection_count == 3); + } + { + projection_count = 0; ---------------- var-const wrote: Optional: I'd move this reset instead to be the last operation in the block above (IMO it's better for the part that modified the counter to do what is essentially cleanup). https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 14:34:18 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 14:34:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b375a.170a0220.1c403.f165@mx.google.com> ================ @@ -0,0 +1,252 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// template S, class T, class Proj = identity> +// requires indirect_binary_predicate, const T*> +// constexpr bool ranges::contains(I first, S last, const T& value, Proj proj = {}); // since C++23 + +// template +// requires indirect_binary_predicate, Proj>, const T*> +// constexpr bool ranges::contains(R&& r, const T& value, Proj proj = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "boolean_testable.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsIt = requires(Iter iter, Sent sent) { std::ranges::contains(iter, sent, *iter); }; + +static_assert(HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt, SentinelForNotSemiregular>); +static_assert(!HasContainsIt, InputRangeNotSentinelEqualityComparableWith>); +static_assert(!HasContainsIt, sentinel_wrapper>>); + +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(HasContainsIt); + +template +concept HasContainsR = requires(Range&& range) { std::ranges::contains(std::forward(range), ValT{}); }; + +static_assert(!HasContainsR); +static_assert(HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); + +static std::vector comparable_data; + +template +constexpr void test_iterators() { + using ValueT = std::iter_value_t; + { // simple tests + ValueT a[] = {1, 2, 3, 4, 5, 6}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 6))); + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole.begin(), whole.end(), 3); + assert(ret); + } + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole, 3); + assert(ret); + } + } + + { // check that a range with a single element works + ValueT a[] = {32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 1))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that an empty range works + ValueT a[] = {}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 1); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 1); + assert(!ret); + } + } + + { // check that the first element matches + ValueT a[] = {32, 3, 2, 1, 0, 23, 21, 9, 40, 100}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 10))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that the last element matches + ValueT a[] = {3, 22, 1, 43, 99, 0, 56, 100, 32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 9))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // no match + ValueT a[] = {13, 1, 21, 4, 5}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 10); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 10); + assert(!ret); + } + } + + { // check that the projections are used + int a[] = {1, 9, 0, 13, 25}; + { + bool ret = std::ranges::contains(a, a + 5, -13, [&](int i) { return i * -1; }); + assert(ret); + } + { + auto range = std::ranges::subrange(a, a + 5); + bool ret = std::ranges::contains(range, -13, [&](int i) { return i * -1; }); + assert(ret); + } + } + + { // check the nodiscard extension + // use #pragma around to suppress error: ignoring return value of function + // declared with 'nodiscard' attribute [-Werror,-Wunused-result] + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunused-result" + ValueT a[] = {1, 9, 0, 13, 25}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + std::ranges::contains(whole, 12); + #pragma clang diagnostic pop + } + + if (!std::is_constant_evaluated()) + comparable_data.clear(); +} + +template +class TriviallyComparable { + ElementT el_; + +public: + constexpr TriviallyComparable(ElementT el) : el_(el) {} + bool operator==(const TriviallyComparable&) const = default; +}; + +template +class Comparable { + IndexT index_; + +public: + Comparable(IndexT i) + : index_([&]() { + IndexT size = static_cast(comparable_data.size()); + comparable_data.push_back(i); + return size; + }()) {} + + bool operator==(const Comparable& other) const { + return comparable_data[other.index_] == comparable_data[index_]; + } + + friend bool operator==(const Comparable& lhs, long long rhs) { return comparable_data[lhs.index_] == rhs; } +}; + +constexpr bool test() { + types::for_each(types::type_list, TriviallyComparable>{}, + [] { + types::for_each(types::cpp20_input_iterator_list{}, + [] { + if constexpr (std::forward_iterator) + test_iterators(); + test_iterators>(); + test_iterators>(); + }); + }); + + { + int a[] = {1, 9, 0, 13, 25}; + int projection_count = 0; + // count invocations of the projection + { + bool ret = std::ranges::contains(a, a + 5, 0, + [&](int i) { ++projection_count; return i; }); + assert(ret); + assert(projection_count == 3); + } + { + projection_count = 0; + auto range = std::ranges::subrange(a, a + 5); + bool ret = std::ranges::contains(range, 0, [&](int i) { ++projection_count; return i; }); ---------------- var-const wrote: Nit: we can just pass `a` here, no need to wrap it in a `subrange`. https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 14:34:20 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 14:34:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b375c.170a0220.dbf46.fee2@mx.google.com> ================ @@ -0,0 +1,252 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// template S, class T, class Proj = identity> +// requires indirect_binary_predicate, const T*> +// constexpr bool ranges::contains(I first, S last, const T& value, Proj proj = {}); // since C++23 + +// template +// requires indirect_binary_predicate, Proj>, const T*> +// constexpr bool ranges::contains(R&& r, const T& value, Proj proj = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "boolean_testable.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsIt = requires(Iter iter, Sent sent) { std::ranges::contains(iter, sent, *iter); }; + +static_assert(HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt, SentinelForNotSemiregular>); +static_assert(!HasContainsIt, InputRangeNotSentinelEqualityComparableWith>); +static_assert(!HasContainsIt, sentinel_wrapper>>); + +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(HasContainsIt); + +template +concept HasContainsR = requires(Range&& range) { std::ranges::contains(std::forward(range), ValT{}); }; + +static_assert(!HasContainsR); +static_assert(HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); + +static std::vector comparable_data; + +template +constexpr void test_iterators() { + using ValueT = std::iter_value_t; + { // simple tests + ValueT a[] = {1, 2, 3, 4, 5, 6}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 6))); + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole.begin(), whole.end(), 3); + assert(ret); + } + { + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::contains(whole, 3); + assert(ret); + } + } + + { // check that a range with a single element works + ValueT a[] = {32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 1))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that an empty range works + ValueT a[] = {}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 1); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 1); + assert(!ret); + } + } + + { // check that the first element matches + ValueT a[] = {32, 3, 2, 1, 0, 23, 21, 9, 40, 100}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 10))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // check that the last element matches + ValueT a[] = {3, 22, 1, 43, 99, 0, 56, 100, 32}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 9))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 32); + assert(ret); + } + { + bool ret = std::ranges::contains(whole, 32); + assert(ret); + } + } + + { // no match + ValueT a[] = {13, 1, 21, 4, 5}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + { + bool ret = std::ranges::contains(whole.begin(), whole.end(), 10); + assert(!ret); + } + { + bool ret = std::ranges::contains(whole, 10); + assert(!ret); + } + } + + { // check that the projections are used + int a[] = {1, 9, 0, 13, 25}; + { + bool ret = std::ranges::contains(a, a + 5, -13, [&](int i) { return i * -1; }); + assert(ret); + } + { + auto range = std::ranges::subrange(a, a + 5); + bool ret = std::ranges::contains(range, -13, [&](int i) { return i * -1; }); + assert(ret); + } + } + + { // check the nodiscard extension + // use #pragma around to suppress error: ignoring return value of function + // declared with 'nodiscard' attribute [-Werror,-Wunused-result] + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunused-result" + ValueT a[] = {1, 9, 0, 13, 25}; + auto whole = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + std::ranges::contains(whole, 12); + #pragma clang diagnostic pop + } + + if (!std::is_constant_evaluated()) + comparable_data.clear(); +} + +template +class TriviallyComparable { + ElementT el_; + +public: + constexpr TriviallyComparable(ElementT el) : el_(el) {} + bool operator==(const TriviallyComparable&) const = default; +}; + +template +class Comparable { + IndexT index_; + +public: + Comparable(IndexT i) + : index_([&]() { + IndexT size = static_cast(comparable_data.size()); + comparable_data.push_back(i); + return size; + }()) {} + + bool operator==(const Comparable& other) const { + return comparable_data[other.index_] == comparable_data[index_]; + } + + friend bool operator==(const Comparable& lhs, long long rhs) { return comparable_data[lhs.index_] == rhs; } +}; + +constexpr bool test() { + types::for_each(types::type_list, TriviallyComparable>{}, + [] { + types::for_each(types::cpp20_input_iterator_list{}, ---------------- var-const wrote: Nit: trailing whitespace. https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 14:34:27 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 14:34:27 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b3763.170a0220.3fac0.e80a@mx.google.com> https://github.com/var-const unassigned https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 14:46:47 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:46:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) Message-ID: https://github.com/shraiysh created https://github.com/llvm/llvm-project/pull/68042 This patch adds translation from `omp.teams` operation to LLVM IR using OpenMPIRBuilder. The clauses are not handled in this patch. >From c7c9e907d897ae667331761d8097ccb7852c5d93 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 16:43:13 -0500 Subject: [PATCH] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR This patch adds translation from `omp.teams` operation to LLVM IR using OpenMPIRBuilder. The clauses are not handled in this patch. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +++ mlir/test/Target/LLVMIR/openmp-teams.mlir | 136 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/openmp-teams.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8f7f1963b3e5a4f..b9643be40e13c01 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -661,6 +661,24 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, return bodyGenStatus; } +// Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder +static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + return op.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ + LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + builder.restoreIP(codegenIP); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + }; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + return bodyGenStatus; +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -2406,6 +2424,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TeamsOp op) { + return convertOmpTeams(op, builder, moduleTranslation); + }) .Case([&](omp::TaskOp op) { return convertOmpTaskOp(op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir new file mode 100644 index 000000000000000..c9005fca94a7c20 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_simple +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) +// CHECK: ret void +llvm.func @omp_teams_simple() { + omp.teams { + llvm.call @foo() : () -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlinedfn:.+]]() +// CHECK: call void @foo() +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr %[[global_tid:.+]], ptr %[[bound_tid:.+]]) +// CHECK: call void @[[outlinedfn]] +// CHECK: ret void + +// ----- + +llvm.func @foo(i32) -> () + +// CHECK-LABEL: @omp_teams_shared_simple +// CHECK-SAME: (i32 [[arg0:%.+]]) +// CHECK: [[structArg:%.+]] = alloca { i32 } +// CHECK: br +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: store i32 [[arg0]], ptr [[gep]] +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:.+]], ptr [[structArg]]) +// CHECK: ret void +llvm.func @omp_teams_shared_simple(%arg0: i32) { + omp.teams { + llvm.call @foo(%arg0) : (i32) -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[structArg:%.+]]) +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: [[loadgep:%.+]] = load i32, ptr [[gep]] +// CHECK: call void @foo(i32 [[loadgep]]) +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr [[global_tid:.+]], ptr [[bound_tid:.+]], ptr [[structArg:.+]]) +// CHECK: call void [[outlinedfn]](ptr [[structArg]]) +// CHECK: ret void + +// ----- + +llvm.func @my_alloca_fn() -> !llvm.ptr +llvm.func @foo(i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () +llvm.func @bar() + +// CHECK-LABEL: @omp_teams_branching_shared +// CHECK-SAME: (i1 [[condition:%.+]], i32 [[arg0:%.+]], float [[arg1:%.+]], ptr [[arg2:%.+]], fp128 [[arg3:%.+]]) + +// Checking that the allocation for struct argument happens in the alloca block. +// CHECK: [[structArg:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } +// CHECK: [[allocated:%.+]] = call ptr @my_alloca_fn() +// CHECK: [[loaded:%.+]] = load i32, ptr [[allocated]] +// CHECK: br label + +// Checking that the shared values are stored properly in the struct arg. +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]] +// CHECK: store i1 [[condition]], ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 1 +// CHECK: store i32 [[arg0]], ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 2 +// CHECK: store float [[arg1]], ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 3 +// CHECK: store ptr [[arg2]], ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 4 +// CHECK: store fp128 [[arg3]], ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 5 +// CHECK: store ptr [[allocated]], ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 6 +// CHECK: store i32 [[loaded]], ptr [[loadedPtr]] + +// Runtime call. +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:@.+]], ptr [[structArg]]) +// CHECK: br label +// CHECK: call void @bar() +// CHECK: ret void +llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %arg2: !llvm.ptr, %arg3: f128) { + %allocated = llvm.call @my_alloca_fn(): () -> !llvm.ptr + %loaded = llvm.load %allocated : !llvm.ptr + llvm.br ^codegenBlock +^codegenBlock: + omp.teams { + llvm.cond_br %condition, ^true_block, ^false_block + ^true_block: + llvm.call @foo(%arg0, %arg1, %arg2, %arg3, %allocated, %loaded) : (i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () + llvm.br ^exit + ^false_block: + llvm.br ^exit + ^exit: + omp.terminator + } + llvm.call @bar() : () -> () + llvm.return +} + +// Check the outlined function. +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[data:%.+]]) +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[data]] +// CHECK: [[condition:%.+]] = load i1, ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 1 +// CHECK: [[arg0:%.+]] = load i32, ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 2 +// CHECK: [[arg1:%.+]] = load float, ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 3 +// CHECK: [[arg2:%.+]] = load ptr, ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 4 +// CHECK: [[arg3:%.+]] = load fp128, ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 5 +// CHECK: [[allocated:%.+]] = load ptr, ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 6 +// CHECK: [[loaded:%.+]] = load i32, ptr [[loadedPtr]] +// CHECK: br label + +// CHECK: br i1 [[condition]], label %[[true:.+]], label %[[false:.+]] +// CHECK: [[false]]: +// CHECK-NEXT: br label +// CHECK: [[true]]: +// CHECK: call void @foo(i32 [[arg0]], float [[arg1]], ptr [[arg2]], fp128 [[arg3]], ptr [[allocated]], i32 [[loaded]]) +// CHECK-NEXT: br label +// CHECK: ret void + +// Check the wrapper function +// CHECK: define void [[wrapperfn]](ptr [[globalTID:%.+]], ptr [[boundTID:%.+]], ptr [[data:%.+]]) +// CHECK: call void [[outlinedfn]](ptr [[data]]) +// CHECK: ret void From lldb-commits at lists.llvm.org Mon Oct 2 14:48:00 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:48:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651b3a90.050a0220.4fae8.4c4a@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-flang-openmp
Changes This patch adds translation from `omp.teams` operation to LLVM IR using OpenMPIRBuilder. The clauses are not handled in this patch. --- Full diff: https://github.com/llvm/llvm-project/pull/68042.diff 2 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+21) - (added) mlir/test/Target/LLVMIR/openmp-teams.mlir (+136) ``````````diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8f7f1963b3e5a4f..b9643be40e13c01 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -661,6 +661,24 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, return bodyGenStatus; } +// Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder +static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + return op.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ + LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + builder.restoreIP(codegenIP); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + }; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + return bodyGenStatus; +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -2406,6 +2424,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TeamsOp op) { + return convertOmpTeams(op, builder, moduleTranslation); + }) .Case([&](omp::TaskOp op) { return convertOmpTaskOp(op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir new file mode 100644 index 000000000000000..c9005fca94a7c20 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_simple +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) +// CHECK: ret void +llvm.func @omp_teams_simple() { + omp.teams { + llvm.call @foo() : () -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlinedfn:.+]]() +// CHECK: call void @foo() +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr %[[global_tid:.+]], ptr %[[bound_tid:.+]]) +// CHECK: call void @[[outlinedfn]] +// CHECK: ret void + +// ----- + +llvm.func @foo(i32) -> () + +// CHECK-LABEL: @omp_teams_shared_simple +// CHECK-SAME: (i32 [[arg0:%.+]]) +// CHECK: [[structArg:%.+]] = alloca { i32 } +// CHECK: br +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: store i32 [[arg0]], ptr [[gep]] +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:.+]], ptr [[structArg]]) +// CHECK: ret void +llvm.func @omp_teams_shared_simple(%arg0: i32) { + omp.teams { + llvm.call @foo(%arg0) : (i32) -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[structArg:%.+]]) +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: [[loadgep:%.+]] = load i32, ptr [[gep]] +// CHECK: call void @foo(i32 [[loadgep]]) +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr [[global_tid:.+]], ptr [[bound_tid:.+]], ptr [[structArg:.+]]) +// CHECK: call void [[outlinedfn]](ptr [[structArg]]) +// CHECK: ret void + +// ----- + +llvm.func @my_alloca_fn() -> !llvm.ptr +llvm.func @foo(i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () +llvm.func @bar() + +// CHECK-LABEL: @omp_teams_branching_shared +// CHECK-SAME: (i1 [[condition:%.+]], i32 [[arg0:%.+]], float [[arg1:%.+]], ptr [[arg2:%.+]], fp128 [[arg3:%.+]]) + +// Checking that the allocation for struct argument happens in the alloca block. +// CHECK: [[structArg:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } +// CHECK: [[allocated:%.+]] = call ptr @my_alloca_fn() +// CHECK: [[loaded:%.+]] = load i32, ptr [[allocated]] +// CHECK: br label + +// Checking that the shared values are stored properly in the struct arg. +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]] +// CHECK: store i1 [[condition]], ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 1 +// CHECK: store i32 [[arg0]], ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 2 +// CHECK: store float [[arg1]], ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 3 +// CHECK: store ptr [[arg2]], ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 4 +// CHECK: store fp128 [[arg3]], ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 5 +// CHECK: store ptr [[allocated]], ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 6 +// CHECK: store i32 [[loaded]], ptr [[loadedPtr]] + +// Runtime call. +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:@.+]], ptr [[structArg]]) +// CHECK: br label +// CHECK: call void @bar() +// CHECK: ret void +llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %arg2: !llvm.ptr, %arg3: f128) { + %allocated = llvm.call @my_alloca_fn(): () -> !llvm.ptr + %loaded = llvm.load %allocated : !llvm.ptr + llvm.br ^codegenBlock +^codegenBlock: + omp.teams { + llvm.cond_br %condition, ^true_block, ^false_block + ^true_block: + llvm.call @foo(%arg0, %arg1, %arg2, %arg3, %allocated, %loaded) : (i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () + llvm.br ^exit + ^false_block: + llvm.br ^exit + ^exit: + omp.terminator + } + llvm.call @bar() : () -> () + llvm.return +} + +// Check the outlined function. +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[data:%.+]]) +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[data]] +// CHECK: [[condition:%.+]] = load i1, ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 1 +// CHECK: [[arg0:%.+]] = load i32, ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 2 +// CHECK: [[arg1:%.+]] = load float, ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 3 +// CHECK: [[arg2:%.+]] = load ptr, ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 4 +// CHECK: [[arg3:%.+]] = load fp128, ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 5 +// CHECK: [[allocated:%.+]] = load ptr, ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 6 +// CHECK: [[loaded:%.+]] = load i32, ptr [[loadedPtr]] +// CHECK: br label + +// CHECK: br i1 [[condition]], label %[[true:.+]], label %[[false:.+]] +// CHECK: [[false]]: +// CHECK-NEXT: br label +// CHECK: [[true]]: +// CHECK: call void @foo(i32 [[arg0]], float [[arg1]], ptr [[arg2]], fp128 [[arg3]], ptr [[allocated]], i32 [[loaded]]) +// CHECK-NEXT: br label +// CHECK: ret void + +// Check the wrapper function +// CHECK: define void [[wrapperfn]](ptr [[globalTID:%.+]], ptr [[boundTID:%.+]], ptr [[data:%.+]]) +// CHECK: call void [[outlinedfn]](ptr [[data]]) +// CHECK: ret void ``````````
https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Mon Oct 2 14:59:06 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 14:59:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651b3d2a.a70a0220.2f724.92a9@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff 457f582ffe23e951380bc345c4c96ec053c09681 dc0304897ba3f8731bb6062f357fe885ca7e584b -- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b9643be40e13..9eefd1be8ad0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -662,20 +662,27 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, } // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder -static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { +static LogicalResult +convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; LogicalResult bodyGenStatus = success(); - if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + if (op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || + op.getThreadLimit() || !op.getAllocatorsVars().empty() || + op.getReductions()) { return op.emitError("unhandled clauses for translation to LLVM IR"); } - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ - LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); builder.restoreIP(codegenIP); - convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, + moduleTranslation, bodyGenStatus); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); return bodyGenStatus; } ``````````
https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Mon Oct 2 15:00:55 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:00:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651b3d97.170a0220.19bb.042d@mx.google.com> =?utf-8?q?Jos=C3=A9?= L. Junior , =?utf-8?q?Jos=C3=A9?= L. Junior Message-ID: In-Reply-To: medismailben wrote: @clayborg what do you think ? Can we merge this ? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Mon Oct 2 15:17:34 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Mon, 02 Oct 2023 15:17:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651b417e.050a0220.2bd3b.d4cb@mx.google.com> ================ @@ -0,0 +1,190 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// template S, class T, class Proj = identity> +// requires indirect_binary_predicate, const T*> +// constexpr bool ranges::contains(I first, S last, const T& value, Proj proj = {}); // since C++23 + +// template +// requires indirect_binary_predicate, Proj>, const T*> +// constexpr bool ranges::contains(R&& r, const T& value, Proj proj = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "boolean_testable.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsIt = requires(Iter iter, Sent sent) { std::ranges::contains(iter, sent, *iter); }; + +static_assert(HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); +static_assert(!HasContainsIt, SentinelForNotSemiregular>); +static_assert(!HasContainsIt, InputRangeNotSentinelEqualityComparableWith>); + +static_assert(!HasContainsIt); +static_assert(!HasContainsIt); + +template +concept HasContainsR = requires(Range range) { std::ranges::contains(range, ValT{}); }; + +static_assert(HasContainsR, int>); +static_assert(!HasContainsR); +static_assert(!HasContainsR, NotEqualityComparable>); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); +static_assert(!HasContainsR); + +static std::vector comparable_data; + +// clang-format off +template +constexpr void test_iterators() { + using ValueT = std::iter_value_t; + { // simple tests + { + ValueT a[] = {1, 2, 3, 4, 5, 6}; + std::same_as auto ret = + std::ranges::contains(Iter(a), Sent(Iter(a + 6)), 3); + assert(ret); + } + { + ValueT a[] = {1, 2, 3, 4, 5, 6}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 6))); + std::same_as decltype(auto) ret = + std::ranges::contains(range, 3); + assert(ret); + } + } + + { // check that an empty range works + { + ValueT a[] = {}; + auto ret = std::ranges::contains(Iter(a), Sent(Iter(a)), 1); + assert(!ret); + } + { + ValueT a[] = {}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a))); + auto ret = std::ranges::contains(range, 1); + assert(!ret); + } + } + + { // check that no match + { + ValueT a[] = {13, 1, 21, 4, 5}; + auto ret = std::ranges::contains(Iter(a), Sent(Iter(a + 5)), 10); + assert(!ret); + } + { + ValueT a[] = {13, 1, 21, 4, 5}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + auto ret = std::ranges::contains(range, 10); + assert(!ret); + } + } + + if (!std::is_constant_evaluated()) + comparable_data.clear(); +} +template +class TriviallyComparable { ---------------- var-const wrote: Thanks for pointing me to the patch. It looks like this is to test an optimization that's specific to `find`. IMO we shouldn't duplicate those tests here, so I'd just remove the `Comparable` and `TriviallyComparable` tests. It might make sense to add a benchmark for `ranges::contains`, though. https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Mon Oct 2 15:17:35 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:17:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b417f.170a0220.68cd5.d0f3@mx.google.com> medismailben wrote: LGTM with some comments. https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 15:18:13 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:18:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b41a5.170a0220.a2299.07ce@mx.google.com> https://github.com/medismailben edited https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 15:18:13 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:18:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b41a5.170a0220.7a3f.0e0b@mx.google.com> https://github.com/medismailben approved this pull request. https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 15:18:14 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:18:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b41a6.170a0220.1efb6.10ae@mx.google.com> ================ @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) ---------------- medismailben wrote: Do you need to specify this since you already set `USE_LIBSTDCPP` in the Makefile ? https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 15:18:14 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 15:18:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b41a6.a70a0220.a6dda.5e57@mx.google.com> ================ @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) ---------------- medismailben wrote: What's the point of this ? https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 15:27:49 2023 From: lldb-commits at lists.llvm.org (Ed Maste via lldb-commits) Date: Mon, 02 Oct 2023 15:27:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651b43e5.a70a0220.6bc75.6d97@mx.google.com> emaste wrote: Hmm, when I attempt to close it via the GitHub UI I get: ![image](https://github.com/llvm/llvm-project/assets/1034582/54534c5d-9873-40c4-81af-45c50919dc92) I think I can just fetch the commit locally and then push it to the tree https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Mon Oct 2 16:39:46 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Mon, 02 Oct 2023 16:39:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) Message-ID: https://github.com/JDevlieghere created https://github.com/llvm/llvm-project/pull/68050 Expose Platform::Attach through the SB API. rdar://116188959 >From 6cf631f5acf3eb18e7cf12a2b996c9f974a360e2 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Sun, 1 Oct 2023 20:48:50 -0700 Subject: [PATCH] [lldb] Expose Platform::Attach through the SB API Expose Platform::Attach through the SB API. rdar://116188959 --- lldb/include/lldb/API/SBAttachInfo.h | 1 + lldb/include/lldb/API/SBDebugger.h | 1 + lldb/include/lldb/API/SBPlatform.h | 5 ++ lldb/include/lldb/API/SBProcess.h | 1 + .../Python/lldbsuite/test/gdbclientutils.py | 6 ++ lldb/source/API/SBPlatform.cpp | 25 ++++++++ .../gdb_remote_client/TestPlatformAttach.py | 58 +++++++++++++++++++ 7 files changed, 97 insertions(+) create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py diff --git a/lldb/include/lldb/API/SBAttachInfo.h b/lldb/include/lldb/API/SBAttachInfo.h index ea1145e625856f0..c18655fee77e0ac 100644 --- a/lldb/include/lldb/API/SBAttachInfo.h +++ b/lldb/include/lldb/API/SBAttachInfo.h @@ -197,6 +197,7 @@ class LLDB_API SBAttachInfo { protected: friend class SBTarget; + friend class SBPlatform; friend class lldb_private::ScriptInterpreter; diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 29cf2c16fad4bd7..218113a7a391f35 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -487,6 +487,7 @@ class LLDB_API SBDebugger { friend class SBProcess; friend class SBSourceManager; friend class SBStructuredData; + friend class SBPlatform; friend class SBTarget; friend class SBTrace; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 6567277a5d161e7..614ee3202def5bc 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -10,6 +10,7 @@ #define LLDB_API_SBPLATFORM_H #include "lldb/API/SBDefines.h" +#include "lldb/API/SBProcess.h" #include @@ -19,6 +20,7 @@ struct PlatformShellCommand; namespace lldb { class SBLaunchInfo; +class SBAttachInfo; class LLDB_API SBPlatformConnectOptions { public: @@ -149,6 +151,9 @@ class LLDB_API SBPlatform { SBError Launch(SBLaunchInfo &launch_info); + SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, + SBTarget &target, SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 16527bb0291fcb4..8c1c81418f83d12 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -449,6 +449,7 @@ class LLDB_API SBProcess { friend class SBExecutionContext; friend class SBFunction; friend class SBModule; + friend class SBPlatform; friend class SBTarget; friend class SBThread; friend class SBValue; diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index a0104d36df8d903..1784487323ad6be 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -196,6 +196,9 @@ def respond(self, packet): return self.vFile(packet) if packet.startswith("vRun;"): return self.vRun(packet) + if packet.startswith("qLaunchGDBServer;"): + _, host = packet.partition(";")[2].split(":") + return self.qLaunchGDBServer(host) if packet.startswith("qLaunchSuccess"): return self.qLaunchSuccess() if packet.startswith("QEnvironment:"): @@ -329,6 +332,9 @@ def vFile(self, packet): def vRun(self, packet): return "" + def qLaunchGDBServer(self, host): + raise self.UnexpectedPacketException() + def qLaunchSuccess(self): return "" diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index f8300a5bab30e41..7dfbb1373989c02 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBDebugger.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" #include "lldb/Target/Platform.h" @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, + SBError &error) { + LLDB_INSTRUMENT_VA(this, attach_info); + + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessAttachInfo &info = attach_info.ref(); + Status status; + ProcessSP process_sp = platform_sp->Attach(info, debugger.ref(), + target.GetSP().get(), status); + error.SetError(status); + return SBProcess(process_sp); + } + + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py new file mode 100644 index 000000000000000..d62e86b2a3c1d20 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -0,0 +1,58 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformAttach(GDBRemoteTestBase): + @skipIfRemote + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") + def test_attach(self): + """Test attaching by name""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self, port): + MockGDBServerResponder.__init__(self) + self.port = port + + def qLaunchGDBServer(self, _): + return "pid:1337;port:{};".format(self.port) + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + class MyGDBResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + + def vAttach(self, _): + return "OK" + + self.server.responder = MyGDBResponder() + port = self.server._socket._server_socket.getsockname()[1] + + platform_socket = TCPServerSocket() + platform_server = MockGDBServer(platform_socket) + platform_server.responder = MyPlatformResponder(port) + platform_server.start() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(platform_server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + attach_info = lldb.SBAttachInfo() + attach_info.SetExecutable("foo") + + target = lldb.SBTarget() + process = platform.Attach(attach_info, self.dbg, target, error) + self.assertSuccess(error) + self.assertEqual(process.GetProcessID(), 95117) + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Mon Oct 2 16:40:53 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 16:40:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5505.170a0220.6c294.095f@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes Expose Platform::Attach through the SB API. rdar://116188959 --- Full diff: https://github.com/llvm/llvm-project/pull/68050.diff 7 Files Affected: - (modified) lldb/include/lldb/API/SBAttachInfo.h (+1) - (modified) lldb/include/lldb/API/SBDebugger.h (+1) - (modified) lldb/include/lldb/API/SBPlatform.h (+5) - (modified) lldb/include/lldb/API/SBProcess.h (+1) - (modified) lldb/packages/Python/lldbsuite/test/gdbclientutils.py (+6) - (modified) lldb/source/API/SBPlatform.cpp (+25) - (added) lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py (+58) ``````````diff diff --git a/lldb/include/lldb/API/SBAttachInfo.h b/lldb/include/lldb/API/SBAttachInfo.h index ea1145e625856f0..c18655fee77e0ac 100644 --- a/lldb/include/lldb/API/SBAttachInfo.h +++ b/lldb/include/lldb/API/SBAttachInfo.h @@ -197,6 +197,7 @@ class LLDB_API SBAttachInfo { protected: friend class SBTarget; + friend class SBPlatform; friend class lldb_private::ScriptInterpreter; diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 29cf2c16fad4bd7..218113a7a391f35 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -487,6 +487,7 @@ class LLDB_API SBDebugger { friend class SBProcess; friend class SBSourceManager; friend class SBStructuredData; + friend class SBPlatform; friend class SBTarget; friend class SBTrace; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 6567277a5d161e7..614ee3202def5bc 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -10,6 +10,7 @@ #define LLDB_API_SBPLATFORM_H #include "lldb/API/SBDefines.h" +#include "lldb/API/SBProcess.h" #include @@ -19,6 +20,7 @@ struct PlatformShellCommand; namespace lldb { class SBLaunchInfo; +class SBAttachInfo; class LLDB_API SBPlatformConnectOptions { public: @@ -149,6 +151,9 @@ class LLDB_API SBPlatform { SBError Launch(SBLaunchInfo &launch_info); + SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, + SBTarget &target, SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 16527bb0291fcb4..8c1c81418f83d12 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -449,6 +449,7 @@ class LLDB_API SBProcess { friend class SBExecutionContext; friend class SBFunction; friend class SBModule; + friend class SBPlatform; friend class SBTarget; friend class SBThread; friend class SBValue; diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index a0104d36df8d903..1784487323ad6be 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -196,6 +196,9 @@ def respond(self, packet): return self.vFile(packet) if packet.startswith("vRun;"): return self.vRun(packet) + if packet.startswith("qLaunchGDBServer;"): + _, host = packet.partition(";")[2].split(":") + return self.qLaunchGDBServer(host) if packet.startswith("qLaunchSuccess"): return self.qLaunchSuccess() if packet.startswith("QEnvironment:"): @@ -329,6 +332,9 @@ def vFile(self, packet): def vRun(self, packet): return "" + def qLaunchGDBServer(self, host): + raise self.UnexpectedPacketException() + def qLaunchSuccess(self): return "" diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index f8300a5bab30e41..7dfbb1373989c02 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBDebugger.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" #include "lldb/Target/Platform.h" @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, + SBError &error) { + LLDB_INSTRUMENT_VA(this, attach_info); + + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessAttachInfo &info = attach_info.ref(); + Status status; + ProcessSP process_sp = platform_sp->Attach(info, debugger.ref(), + target.GetSP().get(), status); + error.SetError(status); + return SBProcess(process_sp); + } + + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py new file mode 100644 index 000000000000000..d62e86b2a3c1d20 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -0,0 +1,58 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformAttach(GDBRemoteTestBase): + @skipIfRemote + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") + def test_attach(self): + """Test attaching by name""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self, port): + MockGDBServerResponder.__init__(self) + self.port = port + + def qLaunchGDBServer(self, _): + return "pid:1337;port:{};".format(self.port) + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + class MyGDBResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + + def vAttach(self, _): + return "OK" + + self.server.responder = MyGDBResponder() + port = self.server._socket._server_socket.getsockname()[1] + + platform_socket = TCPServerSocket() + platform_server = MockGDBServer(platform_socket) + platform_server.responder = MyPlatformResponder(port) + platform_server.start() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(platform_server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + attach_info = lldb.SBAttachInfo() + attach_info.SetExecutable("foo") + + target = lldb.SBTarget() + process = platform.Attach(attach_info, self.dbg, target, error) + self.assertSuccess(error) + self.assertEqual(process.GetProcessID(), 95117) + + platform.DisconnectRemote() ``````````
https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 16:51:34 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 16:51:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Unifying Scripted Affordance Interfaces (PR #68052) Message-ID: https://github.com/medismailben created https://github.com/llvm/llvm-project/pull/68052 None >From 5d1aa93345835d3614caffa8dd3041d995144e7e Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 31 Aug 2023 13:13:57 +0100 Subject: [PATCH 1/6] [lldb] Move ScriptInterpreter Interfaces to subdirectory (NFC) As we're consolidating and streamlining the various scripting affordances of lldb, we keep creating new interface files. This patch groups all the current interface files into a separate sub directory called `Interfaces` both in the core `Interpreter` directory and the `ScriptInterpreter` plugin directory. Differential Revision: https://reviews.llvm.org/D158833 Signed-off-by: Med Ismail Bennani --- .../{ => Interfaces}/ScriptedInterface.h | 6 +-- .../ScriptedPlatformInterface.h | 8 +-- .../ScriptedProcessInterface.h | 38 ++------------ .../Interfaces/ScriptedThreadInterface.h | 52 +++++++++++++++++++ .../lldb/Interpreter/ScriptInterpreter.h | 9 +++- .../ScriptInterpreter/Python/CMakeLists.txt | 7 ++- .../Python/Interfaces/CMakeLists.txt | 38 ++++++++++++++ .../ScriptedPlatformPythonInterface.cpp | 6 +-- .../ScriptedPlatformPythonInterface.h | 8 +-- .../ScriptedProcessPythonInterface.cpp | 8 +-- .../ScriptedProcessPythonInterface.h | 8 +-- .../ScriptedPythonInterface.cpp | 4 +- .../ScriptedPythonInterface.h | 15 +++--- .../ScriptedThreadPythonInterface.cpp | 6 +-- .../ScriptedThreadPythonInterface.h | 8 +-- .../Python/ScriptInterpreterPython.cpp | 10 +++- .../Python/ScriptInterpreterPythonImpl.h | 1 + 17 files changed, 151 insertions(+), 81 deletions(-) rename lldb/include/lldb/Interpreter/{ => Interfaces}/ScriptedInterface.h (93%) rename lldb/include/lldb/Interpreter/{ => Interfaces}/ScriptedPlatformInterface.h (85%) rename lldb/include/lldb/Interpreter/{ => Interfaces}/ScriptedProcessInterface.h (68%) create mode 100644 lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h create mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedPlatformPythonInterface.cpp (97%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedPlatformPythonInterface.h (80%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedProcessPythonInterface.cpp (97%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedProcessPythonInterface.h (86%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedPythonInterface.cpp (98%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedPythonInterface.h (95%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedThreadPythonInterface.cpp (97%) rename lldb/source/Plugins/ScriptInterpreter/Python/{ => Interfaces}/ScriptedThreadPythonInterface.h (81%) diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h similarity index 93% rename from lldb/include/lldb/Interpreter/ScriptedInterface.h rename to lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h index ab6142593264374..948f763e95ecea4 100644 --- a/lldb/include/lldb/Interpreter/ScriptedInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_INTERPRETER_SCRIPTEDINTERFACE_H -#define LLDB_INTERPRETER_SCRIPTEDINTERFACE_H +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDINTERFACE_H #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Target/ExecutionContext.h" @@ -74,4 +74,4 @@ class ScriptedInterface { StructuredData::GenericSP m_object_instance_sp; }; } // namespace lldb_private -#endif // LLDB_INTERPRETER_SCRIPTEDINTERFACE_H +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptedPlatformInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h similarity index 85% rename from lldb/include/lldb/Interpreter/ScriptedPlatformInterface.h rename to lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h index 2db1fef0be721e6..c687cabfe0c1278 100644 --- a/lldb/include/lldb/Interpreter/ScriptedPlatformInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_INTERPRETER_SCRIPTEDPLATFORMINTERFACE_H -#define LLDB_INTERPRETER_SCRIPTEDPLATFORMINTERFACE_H +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDPLATFORMINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDPLATFORMINTERFACE_H #include "lldb/Core/StructuredDataImpl.h" -#include "lldb/Interpreter/ScriptedInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedInterface.h" #include "lldb/lldb-private.h" @@ -46,4 +46,4 @@ class ScriptedPlatformInterface : virtual public ScriptedInterface { }; } // namespace lldb_private -#endif // LLDB_INTERPRETER_SCRIPTEDPLATFORMINTERFACE_H +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDPLATFORMINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h similarity index 68% rename from lldb/include/lldb/Interpreter/ScriptedProcessInterface.h rename to lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h index 210b1081cddcd79..68756c4d9ac858d 100644 --- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_INTERPRETER_SCRIPTEDPROCESSINTERFACE_H -#define LLDB_INTERPRETER_SCRIPTEDPROCESSINTERFACE_H +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDPROCESSINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDPROCESSINTERFACE_H +#include "ScriptedInterface.h" #include "lldb/Core/StructuredDataImpl.h" -#include "lldb/Interpreter/ScriptedInterface.h" #include "lldb/Target/MemoryRegionInfo.h" #include "lldb/lldb-private.h" @@ -80,36 +80,6 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return {}; } }; - -class ScriptedThreadInterface : virtual public ScriptedInterface { -public: - StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override { - return {}; - } - - virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; } - - virtual std::optional GetName() { return std::nullopt; } - - virtual lldb::StateType GetState() { return lldb::eStateInvalid; } - - virtual std::optional GetQueue() { return std::nullopt; } - - virtual StructuredData::DictionarySP GetStopReason() { return {}; } - - virtual StructuredData::ArraySP GetStackFrames() { return {}; } - - virtual StructuredData::DictionarySP GetRegisterInfo() { return {}; } - - virtual std::optional GetRegisterContext() { - return std::nullopt; - } - - virtual StructuredData::ArraySP GetExtendedInfo() { return {}; } -}; } // namespace lldb_private -#endif // LLDB_INTERPRETER_SCRIPTEDPROCESSINTERFACE_H +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDPROCESSINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h new file mode 100644 index 000000000000000..781df51a213229a --- /dev/null +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h @@ -0,0 +1,52 @@ +//===-- ScriptedThreadInterface.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADINTERFACE_H + +#include "ScriptedInterface.h" +#include "lldb/Core/StructuredDataImpl.h" + +#include "lldb/lldb-private.h" + +#include +#include + +namespace lldb_private { +class ScriptedThreadInterface : virtual public ScriptedInterface { +public: + StructuredData::GenericSP + CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, + StructuredData::DictionarySP args_sp, + StructuredData::Generic *script_obj = nullptr) override { + return {}; + } + + virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; } + + virtual std::optional GetName() { return std::nullopt; } + + virtual lldb::StateType GetState() { return lldb::eStateInvalid; } + + virtual std::optional GetQueue() { return std::nullopt; } + + virtual StructuredData::DictionarySP GetStopReason() { return {}; } + + virtual StructuredData::ArraySP GetStackFrames() { return {}; } + + virtual StructuredData::DictionarySP GetRegisterInfo() { return {}; } + + virtual std::optional GetRegisterContext() { + return std::nullopt; + } + + virtual StructuredData::ArraySP GetExtendedInfo() { return {}; } +}; +} // namespace lldb_private + +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 7ee251e2087502e..eacd10d5279be6f 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -21,9 +21,10 @@ #include "lldb/Core/ThreadedCommunication.h" #include "lldb/Host/PseudoTerminal.h" #include "lldb/Host/StreamFile.h" +#include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" #include "lldb/Interpreter/ScriptObject.h" -#include "lldb/Interpreter/ScriptedPlatformInterface.h" -#include "lldb/Interpreter/ScriptedProcessInterface.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/StructuredData.h" @@ -588,6 +589,10 @@ class ScriptInterpreter : public PluginInterface { return std::make_unique(); } + virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { + return std::make_shared(); + } + ScriptedPlatformInterface &GetScriptedPlatformInterface() { return *m_scripted_platform_interface_up; } diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt index 72361166964241a..7523d65abf0f802 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt @@ -19,14 +19,12 @@ if (LLDB_ENABLE_LIBEDIT) list(APPEND LLDB_LIBEDIT_LIBS LibEdit::LibEdit) endif() +add_subdirectory(Interfaces) + add_lldb_library(lldbPluginScriptInterpreterPython PLUGIN PythonDataObjects.cpp PythonReadline.cpp ScriptInterpreterPython.cpp - ScriptedPythonInterface.cpp - ScriptedProcessPythonInterface.cpp - ScriptedThreadPythonInterface.cpp - ScriptedPlatformPythonInterface.cpp LINK_LIBS lldbBreakpoint @@ -35,6 +33,7 @@ add_lldb_library(lldbPluginScriptInterpreterPython PLUGIN lldbHost lldbInterpreter lldbTarget + lldbPluginScriptInterpreterPythonInterfaces ${Python3_LIBRARIES} ${LLDB_LIBEDIT_LIBS} diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt new file mode 100644 index 000000000000000..73aeb32ead855e8 --- /dev/null +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt @@ -0,0 +1,38 @@ +if (APPLE AND LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY) + # Work around an issue with the Python headers, which have a modular include + # inside an extern "C" block. + remove_module_flags() +endif() + +if(NOT LLDB_PYTHON_RELATIVE_PATH) + message(FATAL_ERROR "LLDB_PYTHON_RELATIVE_PATH is not set.") +endif() +add_definitions(-DLLDB_PYTHON_RELATIVE_LIBDIR="${LLDB_PYTHON_RELATIVE_PATH}") + +if(NOT LLDB_PYTHON_EXE_RELATIVE_PATH) + message(FATAL_ERROR "LLDB_PYTHON_EXE_RELATIVE_PATH is not set.") +endif() +add_definitions(-DLLDB_PYTHON_EXE_RELATIVE_PATH="${LLDB_PYTHON_EXE_RELATIVE_PATH}") + + +if (LLDB_ENABLE_LIBEDIT) + list(APPEND LLDB_LIBEDIT_LIBS LibEdit::LibEdit) +endif() + +add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces + ScriptedPythonInterface.cpp + ScriptedProcessPythonInterface.cpp + ScriptedThreadPythonInterface.cpp + ScriptedPlatformPythonInterface.cpp + + LINK_LIBS + lldbCore + lldbHost + lldbInterpreter + lldbTarget + ${Python3_LIBRARIES} + ${LLDB_LIBEDIT_LIBS} + + LINK_COMPONENTS + Support + ) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp similarity index 97% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.cpp rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp index a0c55874c70a4a0..9bed33516915d08 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp @@ -14,10 +14,10 @@ #if LLDB_ENABLE_PYTHON // LLDB Python header must be included first -#include "lldb-python.h" +#include "../lldb-python.h" -#include "SWIGPythonBridge.h" -#include "ScriptInterpreterPythonImpl.h" +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" #include "ScriptedPlatformPythonInterface.h" using namespace lldb; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h similarity index 80% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.h rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h index 1e3ad9962325a3f..02deecd15ede062 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPlatformPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPLATFORMPYTHONINTERFACE_H -#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPLATFORMPYTHONINTERFACE_H +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPLATFORMPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPLATFORMPYTHONINTERFACE_H #include "lldb/Host/Config.h" #if LLDB_ENABLE_PYTHON #include "ScriptedPythonInterface.h" -#include "lldb/Interpreter/ScriptedPlatformInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" namespace lldb_private { class ScriptedPlatformPythonInterface : public ScriptedPlatformInterface, @@ -41,4 +41,4 @@ class ScriptedPlatformPythonInterface : public ScriptedPlatformInterface, } // namespace lldb_private #endif // LLDB_ENABLE_PYTHON -#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPLATFORMPYTHONINTERFACE_H +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPLATFORMPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp similarity index 97% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp index 019924fa19718f0..63a4db1ff5973e7 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp @@ -9,7 +9,7 @@ #include "lldb/Host/Config.h" #if LLDB_ENABLE_PYTHON // LLDB Python header must be included first -#include "lldb-python.h" +#include "../lldb-python.h" #endif #include "lldb/Target/Process.h" #include "lldb/Utility/Log.h" @@ -18,8 +18,8 @@ #if LLDB_ENABLE_PYTHON -#include "SWIGPythonBridge.h" -#include "ScriptInterpreterPythonImpl.h" +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" #include "ScriptedProcessPythonInterface.h" #include "ScriptedThreadPythonInterface.h" #include @@ -199,7 +199,7 @@ ScriptedProcessPythonInterface::GetScriptedThreadPluginName() { lldb::ScriptedThreadInterfaceSP ScriptedProcessPythonInterface::CreateScriptedThreadInterface() { - return std::make_shared(m_interpreter); + return m_interpreter.CreateScriptedThreadInterface(); } StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h similarity index 86% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h index ff03eab07648a84..11330f5591b74e6 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPROCESSPYTHONINTERFACE_H -#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPROCESSPYTHONINTERFACE_H +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPROCESSPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPROCESSPYTHONINTERFACE_H #include "lldb/Host/Config.h" #if LLDB_ENABLE_PYTHON #include "ScriptedPythonInterface.h" -#include "lldb/Interpreter/ScriptedProcessInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include namespace lldb_private { @@ -68,4 +68,4 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, } // namespace lldb_private #endif // LLDB_ENABLE_PYTHON -#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPROCESSPYTHONINTERFACE_H +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPROCESSPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp similarity index 98% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp index 1e36a81fde54e8b..6f22503b279ca62 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp @@ -13,9 +13,9 @@ #if LLDB_ENABLE_PYTHON // LLDB Python header must be included first -#include "lldb-python.h" +#include "../lldb-python.h" -#include "ScriptInterpreterPythonImpl.h" +#include "../ScriptInterpreterPythonImpl.h" #include "ScriptedPythonInterface.h" #include diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h similarity index 95% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 4d0645d18acad4a..9163b8f6aede7d1 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H -#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPYTHONINTERFACE_H #if LLDB_ENABLE_PYTHON @@ -18,12 +18,12 @@ #include #include "lldb/Host/Config.h" -#include "lldb/Interpreter/ScriptedInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedInterface.h" #include "lldb/Utility/DataBufferHeap.h" -#include "PythonDataObjects.h" -#include "SWIGPythonBridge.h" -#include "ScriptInterpreterPythonImpl.h" +#include "../PythonDataObjects.h" +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" namespace lldb_private { class ScriptInterpreterPythonImpl; @@ -146,7 +146,6 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { original_arg = ExtractValueFromPythonObject(transformed_arg, error); } - void ReverseTransform(bool &original_arg, python::PythonObject transformed_arg, Status &error) { python::PythonBoolean boolean_arg = python::PythonBoolean( @@ -254,4 +253,4 @@ ScriptedPythonInterface::ExtractValueFromPythonObject< } // namespace lldb_private #endif // LLDB_ENABLE_PYTHON -#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp similarity index 97% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index 5603a1541314a56..6addcd46e045efc 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -13,10 +13,10 @@ #if LLDB_ENABLE_PYTHON // LLDB Python header must be included first -#include "lldb-python.h" +#include "../lldb-python.h" -#include "SWIGPythonBridge.h" -#include "ScriptInterpreterPythonImpl.h" +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" #include "ScriptedThreadPythonInterface.h" #include diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h similarity index 81% rename from lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h rename to lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h index eac4941f88145dd..b63760fd5b5713d 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDTHREADPYTHONINTERFACE_H -#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDTHREADPYTHONINTERFACE_H +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPYTHONINTERFACE_H #include "lldb/Host/Config.h" #if LLDB_ENABLE_PYTHON #include "ScriptedPythonInterface.h" -#include "lldb/Interpreter/ScriptedProcessInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" #include namespace lldb_private { @@ -49,4 +49,4 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface, } // namespace lldb_private #endif // LLDB_ENABLE_PYTHON -#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDTHREADPYTHONINTERFACE_H +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 0d6ff6660acd3db..66e6ac796c9340b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -14,12 +14,13 @@ // LLDB Python header must be included first #include "lldb-python.h" +#include "Interfaces/ScriptedPlatformPythonInterface.h" +#include "Interfaces/ScriptedProcessPythonInterface.h" +#include "Interfaces/ScriptedThreadPythonInterface.h" #include "PythonDataObjects.h" #include "PythonReadline.h" #include "SWIGPythonBridge.h" #include "ScriptInterpreterPythonImpl.h" -#include "ScriptedPlatformPythonInterface.h" -#include "ScriptedProcessPythonInterface.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFrame.h" @@ -1515,6 +1516,11 @@ ScriptInterpreterPythonImpl::CreateScriptedProcessInterface() { return std::make_unique(*this); } +ScriptedThreadInterfaceSP +ScriptInterpreterPythonImpl::CreateScriptedThreadInterface() { + return std::make_shared(*this); +} + StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateStructuredDataFromScriptObject( ScriptObject obj) { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index 01db6c5203007a1..00dc1d1baa309f6 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -140,6 +140,7 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { StructuredData::DictionarySP OSPlugin_RegisterInfo(StructuredData::ObjectSP os_plugin_object_sp) override; + lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; StructuredData::ArraySP OSPlugin_ThreadsInfo(StructuredData::ObjectSP os_plugin_object_sp) override; >From f305dded1ed6ff289b98f53899a6aa41ec9369ad Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 31 Aug 2023 22:37:35 +0100 Subject: [PATCH 2/6] [lldb] Move template python files to separate directory This patch moves the template files for the various scripting affordances to a separate directory. This is a preparatory work for upcoming improvements and consolidations to other scripting affordances. Differential Revision: https://reviews.llvm.org/D159310 Signed-off-by: Med Ismail Bennani --- lldb/bindings/python/CMakeLists.txt | 6 +++--- .../{scripted_process => }/crashlog_scripted_process.py | 0 .../{scripted_process => templates}/scripted_platform.py | 0 .../{scripted_process => templates}/scripted_process.py | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename lldb/examples/python/{scripted_process => }/crashlog_scripted_process.py (100%) rename lldb/examples/python/{scripted_process => templates}/scripted_platform.py (100%) rename lldb/examples/python/{scripted_process => templates}/scripted_process.py (100%) diff --git a/lldb/bindings/python/CMakeLists.txt b/lldb/bindings/python/CMakeLists.txt index 2cc3ad1bb98b73e..c4806bda27049c6 100644 --- a/lldb/bindings/python/CMakeLists.txt +++ b/lldb/bindings/python/CMakeLists.txt @@ -103,15 +103,15 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar ${lldb_python_target_dir} "plugins" FILES - "${LLDB_SOURCE_DIR}/examples/python/scripted_process/scripted_process.py" - "${LLDB_SOURCE_DIR}/examples/python/scripted_process/scripted_platform.py") + "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_process.py" + "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_platform.py") if(APPLE) create_python_package( ${swig_target} ${lldb_python_target_dir} "macosx" FILES "${LLDB_SOURCE_DIR}/examples/python/crashlog.py" - "${LLDB_SOURCE_DIR}/examples/python/scripted_process/crashlog_scripted_process.py" + "${LLDB_SOURCE_DIR}/examples/python/crashlog_scripted_process.py" "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap.py") create_python_package( diff --git a/lldb/examples/python/scripted_process/crashlog_scripted_process.py b/lldb/examples/python/crashlog_scripted_process.py similarity index 100% rename from lldb/examples/python/scripted_process/crashlog_scripted_process.py rename to lldb/examples/python/crashlog_scripted_process.py diff --git a/lldb/examples/python/scripted_process/scripted_platform.py b/lldb/examples/python/templates/scripted_platform.py similarity index 100% rename from lldb/examples/python/scripted_process/scripted_platform.py rename to lldb/examples/python/templates/scripted_platform.py diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/templates/scripted_process.py similarity index 100% rename from lldb/examples/python/scripted_process/scripted_process.py rename to lldb/examples/python/templates/scripted_process.py >From 611fcf3b05750871d924e10b91aab74aa82a6980 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 31 Aug 2023 23:05:18 +0100 Subject: [PATCH 3/6] [lldb] Introduce OperatingSystem{,Python}Interface and make us it This patch aims to consolidate the OperatingSystem scripting affordance by introducing a stable interface that conforms to the Scripted{,Python}Interface. This unify the way we call into python methods from lldb while also improving its capabilities by allowing us to pass lldb_private objects are arguments. Differential Revision: https://reviews.llvm.org/D159314 Signed-off-by: Med Ismail Bennani --- lldb/bindings/python/python-wrapper.swig | 17 +- lldb/bindings/python/python.swig | 1 + .../Interfaces/OperatingSystemInterface.h | 33 ++++ .../lldb/Interpreter/ScriptInterpreter.h | 11 +- lldb/include/lldb/lldb-forward.h | 3 + .../Python/OperatingSystemPython.cpp | 150 +++++++---------- .../Python/OperatingSystemPython.h | 7 +- .../Python/Interfaces/CMakeLists.txt | 1 + .../OperatingSystemPythonInterface.cpp | 77 +++++++++ .../OperatingSystemPythonInterface.h | 44 +++++ .../Python/ScriptInterpreterPython.cpp | 159 +----------------- .../Python/ScriptInterpreterPythonImpl.h | 17 +- 12 files changed, 251 insertions(+), 269 deletions(-) create mode 100644 lldb/include/lldb/Interpreter/Interfaces/OperatingSystemInterface.h create mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp create mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index cb54901e66d03c6..758fdf6c80c0029 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -263,11 +263,20 @@ PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedObjec } PythonObject result = {}; - if (arg_info.get().max_positional_args == 2) { + switch (arg_info.get().max_positional_args) { + case 1: + // FIXME: Since this is used by different scripting affordances, they can have different number + // of argument but also different types of arguments (i.e SBExecutionContect vs SBProcess) + // We need to have a more reliable way to forward positional arguments. + result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp->GetProcessSP())); + break; + case 2: result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp), SWIGBridge::ToSWIGWrapper(args_impl)); - } else { - error_string.assign("wrong number of arguments in __init__, should be 2 " - "(not including self)"); + break; + default: + error_string.assign("wrong number of arguments in __init__, should be 2 " + "(not including self)"); + break; } return result; } diff --git a/lldb/bindings/python/python.swig b/lldb/bindings/python/python.swig index 278c0eed2bab27f..a8ead02250d9cd0 100644 --- a/lldb/bindings/python/python.swig +++ b/lldb/bindings/python/python.swig @@ -116,6 +116,7 @@ def lldb_iter(obj, getsize, getelem): %{ #include "../source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h" #include "../source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h" +#include "../include/lldb/Target/ExecutionContext.h" #include "../bindings/python/python-swigsafecast.swig" using namespace lldb_private; using namespace lldb_private::python; diff --git a/lldb/include/lldb/Interpreter/Interfaces/OperatingSystemInterface.h b/lldb/include/lldb/Interpreter/Interfaces/OperatingSystemInterface.h new file mode 100644 index 000000000000000..3c46f99f3b356fe --- /dev/null +++ b/lldb/include/lldb/Interpreter/Interfaces/OperatingSystemInterface.h @@ -0,0 +1,33 @@ +//===-- OperatingSystemInterface.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_INTERPRETER_INTERFACES_OPERATINGSYSTEMINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_OPERATINGSYSTEMINTERFACE_H + +#include "ScriptedThreadInterface.h" +#include "lldb/Core/StructuredDataImpl.h" + +#include "lldb/lldb-private.h" + +namespace lldb_private { +class OperatingSystemInterface : virtual public ScriptedThreadInterface { +public: + virtual StructuredData::DictionarySP CreateThread(lldb::tid_t tid, + lldb::addr_t context) { + return {}; + } + + virtual StructuredData::ArraySP GetThreadInfo() { return {}; } + + virtual std::optional GetRegisterContextForTID(lldb::tid_t tid) { + return std::nullopt; + } +}; +} // namespace lldb_private + +#endif // LLDB_INTERPRETER_INTERFACES_OPERATINGSYSTEMINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index eacd10d5279be6f..57a1962441e9510 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -21,6 +21,7 @@ #include "lldb/Core/ThreadedCommunication.h" #include "lldb/Host/PseudoTerminal.h" #include "lldb/Host/StreamFile.h" +#include "lldb/Interpreter/Interfaces/OperatingSystemInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" @@ -252,12 +253,6 @@ class ScriptInterpreter : public PluginInterface { return lldb::ValueObjectListSP(); } - virtual StructuredData::GenericSP - OSPlugin_CreatePluginObject(const char *class_name, - lldb::ProcessSP process_sp) { - return StructuredData::GenericSP(); - } - virtual StructuredData::DictionarySP OSPlugin_RegisterInfo(StructuredData::ObjectSP os_plugin_object_sp) { return StructuredData::DictionarySP(); @@ -593,6 +588,10 @@ class ScriptInterpreter : public PluginInterface { return std::make_shared(); } + virtual lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() { + return std::make_shared(); + } + ScriptedPlatformInterface &GetScriptedPlatformInterface() { return *m_scripted_platform_interface_up; } diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 3cd71c8a4ba3c0a..aa099d4abc3b09f 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -130,6 +130,7 @@ class ObjectContainer; class ObjectFile; class ObjectFileJITDelegate; class OperatingSystem; +class OperatingSystemInterface; class OptionGroup; class OptionGroupOptions; class OptionGroupPlatform; @@ -360,6 +361,8 @@ typedef std::shared_ptr typedef std::weak_ptr ObjectFileJITDelegateWP; typedef std::unique_ptr OperatingSystemUP; +typedef std::shared_ptr + OperatingSystemInterfaceSP; typedef std::shared_ptr OptionValueSP; typedef std::weak_ptr OptionValueWP; typedef std::shared_ptr diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp index 9560ae108f3e354..9a5dd515489231c 100644 --- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp +++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp @@ -74,47 +74,69 @@ llvm::StringRef OperatingSystemPython::GetPluginDescriptionStatic() { OperatingSystemPython::OperatingSystemPython(lldb_private::Process *process, const FileSpec &python_module_path) : OperatingSystem(process), m_thread_list_valobj_sp(), m_register_info_up(), - m_interpreter(nullptr), m_python_object_sp() { + m_interpreter(nullptr), m_script_object_sp() { if (!process) return; TargetSP target_sp = process->CalculateTarget(); if (!target_sp) return; m_interpreter = target_sp->GetDebugger().GetScriptInterpreter(); - if (m_interpreter) { - - std::string os_plugin_class_name( - python_module_path.GetFilename().AsCString("")); - if (!os_plugin_class_name.empty()) { - LoadScriptOptions options; - char python_module_path_cstr[PATH_MAX]; - python_module_path.GetPath(python_module_path_cstr, - sizeof(python_module_path_cstr)); - Status error; - if (m_interpreter->LoadScriptingModule(python_module_path_cstr, options, - error)) { - // Strip the ".py" extension if there is one - size_t py_extension_pos = os_plugin_class_name.rfind(".py"); - if (py_extension_pos != std::string::npos) - os_plugin_class_name.erase(py_extension_pos); - // Add ".OperatingSystemPlugIn" to the module name to get a string like - // "modulename.OperatingSystemPlugIn" - os_plugin_class_name += ".OperatingSystemPlugIn"; - StructuredData::ObjectSP object_sp = - m_interpreter->OSPlugin_CreatePluginObject( - os_plugin_class_name.c_str(), process->CalculateProcess()); - if (object_sp && object_sp->IsValid()) - m_python_object_sp = object_sp; - } - } - } + if (!m_interpreter) + return; + + std::string os_plugin_class_name( + python_module_path.GetFilename().AsCString("")); + if (os_plugin_class_name.empty()) + return; + + LoadScriptOptions options; + char python_module_path_cstr[PATH_MAX]; + python_module_path.GetPath(python_module_path_cstr, + sizeof(python_module_path_cstr)); + Status error; + if (!m_interpreter->LoadScriptingModule(python_module_path_cstr, options, + error)) + return; + + // Strip the ".py" extension if there is one + size_t py_extension_pos = os_plugin_class_name.rfind(".py"); + if (py_extension_pos != std::string::npos) + os_plugin_class_name.erase(py_extension_pos); + // Add ".OperatingSystemPlugIn" to the module name to get a string like + // "modulename.OperatingSystemPlugIn" + os_plugin_class_name += ".OperatingSystemPlugIn"; + + auto operating_system_interface = + m_interpreter->CreateOperatingSystemInterface(); + if (!operating_system_interface) + // return llvm::createStringError( + // llvm::inconvertibleErrorCode(), + // "Failed to create scripted thread interface."); + return; + + ExecutionContext exe_ctx(process); + StructuredData::GenericSP owned_script_object_sp = + operating_system_interface->CreatePluginObject(os_plugin_class_name, + exe_ctx, nullptr); + + if (!owned_script_object_sp) + // return llvm::createStringError(llvm::inconvertibleErrorCode(), + // "Failed to create script object."); + return; + if (!owned_script_object_sp->IsValid()) + // return llvm::createStringError(llvm::inconvertibleErrorCode(), + // "Created script object is invalid."); + return; + + m_script_object_sp = owned_script_object_sp; + m_operating_system_interface_sp = operating_system_interface; } OperatingSystemPython::~OperatingSystemPython() = default; DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() { if (m_register_info_up == nullptr) { - if (!m_interpreter || !m_python_object_sp) + if (!m_interpreter || !m_operating_system_interface_sp) return nullptr; Log *log = GetLog(LLDBLog::OS); @@ -124,7 +146,7 @@ DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() { m_process->GetID()); StructuredData::DictionarySP dictionary = - m_interpreter->OSPlugin_RegisterInfo(m_python_object_sp); + m_operating_system_interface_sp->GetRegisterInfo(); if (!dictionary) return nullptr; @@ -140,27 +162,11 @@ DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() { bool OperatingSystemPython::UpdateThreadList(ThreadList &old_thread_list, ThreadList &core_thread_list, ThreadList &new_thread_list) { - if (!m_interpreter || !m_python_object_sp) + if (!m_interpreter || !m_operating_system_interface_sp) return false; Log *log = GetLog(LLDBLog::OS); - // First thing we have to do is to try to get the API lock, and the - // interpreter lock. We're going to change the thread content of the process, - // and we're going to use python, which requires the API lock to do it. We - // need the interpreter lock to make sure thread_info_dict stays alive. - // - // If someone already has the API lock, that is ok, we just want to avoid - // external code from making new API calls while this call is happening. - // - // This is a recursive lock so we can grant it to any Python code called on - // the stack below us. - Target &target = m_process->GetTarget(); - std::unique_lock api_lock(target.GetAPIMutex(), - std::defer_lock); - (void)api_lock.try_lock(); // See above. - auto interpreter_lock = m_interpreter->AcquireInterpreterLock(); - LLDB_LOGF(log, "OperatingSystemPython::UpdateThreadList() fetching thread " "data from python for pid %" PRIu64, @@ -170,7 +176,7 @@ bool OperatingSystemPython::UpdateThreadList(ThreadList &old_thread_list, // the lldb_private::Process subclass, no memory threads will be in this // list. StructuredData::ArraySP threads_list = - m_interpreter->OSPlugin_ThreadsInfo(m_python_object_sp); + m_operating_system_interface_sp->GetThreadInfo(); const uint32_t num_cores = core_thread_list.GetSize(false); @@ -281,28 +287,12 @@ RegisterContextSP OperatingSystemPython::CreateRegisterContextForThread(Thread *thread, addr_t reg_data_addr) { RegisterContextSP reg_ctx_sp; - if (!m_interpreter || !m_python_object_sp || !thread) + if (!m_interpreter || !m_script_object_sp || !thread) return reg_ctx_sp; if (!IsOperatingSystemPluginThread(thread->shared_from_this())) return reg_ctx_sp; - // First thing we have to do is to try to get the API lock, and the - // interpreter lock. We're going to change the thread content of the process, - // and we're going to use python, which requires the API lock to do it. We - // need the interpreter lock to make sure thread_info_dict stays alive. - // - // If someone already has the API lock, that is ok, we just want to avoid - // external code from making new API calls while this call is happening. - // - // This is a recursive lock so we can grant it to any Python code called on - // the stack below us. - Target &target = m_process->GetTarget(); - std::unique_lock api_lock(target.GetAPIMutex(), - std::defer_lock); - (void)api_lock.try_lock(); // See above. - auto interpreter_lock = m_interpreter->AcquireInterpreterLock(); - Log *log = GetLog(LLDBLog::Thread); if (reg_data_addr != LLDB_INVALID_ADDRESS) { @@ -324,11 +314,11 @@ OperatingSystemPython::CreateRegisterContextForThread(Thread *thread, ") fetching register data from python", thread->GetID(), thread->GetProtocolID()); - StructuredData::StringSP reg_context_data = - m_interpreter->OSPlugin_RegisterContextData(m_python_object_sp, - thread->GetID()); + std::optional reg_context_data = + m_operating_system_interface_sp->GetRegisterContextForTID( + thread->GetID()); if (reg_context_data) { - std::string value = std::string(reg_context_data->GetValue()); + std::string value = *reg_context_data; DataBufferSP data_sp(new DataBufferHeap(value.c_str(), value.length())); if (data_sp->GetByteSize()) { RegisterContextMemory *reg_ctx_memory = new RegisterContextMemory( @@ -347,6 +337,7 @@ OperatingSystemPython::CreateRegisterContextForThread(Thread *thread, "OperatingSystemPython::CreateRegisterContextForThread (tid " "= 0x%" PRIx64 ") forcing a dummy register context", thread->GetID()); + Target &target = m_process->GetTarget(); reg_ctx_sp = std::make_shared( *thread, 0, target.GetArchitecture().GetAddressByteSize()); } @@ -372,26 +363,11 @@ lldb::ThreadSP OperatingSystemPython::CreateThread(lldb::tid_t tid, ", context = 0x%" PRIx64 ") fetching register data from python", tid, context); - if (m_interpreter && m_python_object_sp) { - // First thing we have to do is to try to get the API lock, and the - // interpreter lock. We're going to change the thread content of the - // process, and we're going to use python, which requires the API lock to - // do it. We need the interpreter lock to make sure thread_info_dict stays - // alive. - // - // If someone already has the API lock, that is ok, we just want to avoid - // external code from making new API calls while this call is happening. - // - // This is a recursive lock so we can grant it to any Python code called on - // the stack below us. - Target &target = m_process->GetTarget(); - std::unique_lock api_lock(target.GetAPIMutex(), - std::defer_lock); - (void)api_lock.try_lock(); // See above. - auto interpreter_lock = m_interpreter->AcquireInterpreterLock(); + if (m_interpreter && m_script_object_sp) { StructuredData::DictionarySP thread_info_dict = - m_interpreter->OSPlugin_CreateThread(m_python_object_sp, tid, context); + m_operating_system_interface_sp->CreateThread(tid, context); + std::vector core_used_map; if (thread_info_dict) { ThreadList core_threads(m_process); diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h index 7800cf03af8e888..90973acde3ebfd0 100644 --- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h +++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h @@ -62,7 +62,7 @@ class OperatingSystemPython : public lldb_private::OperatingSystem { protected: bool IsValid() const { - return m_python_object_sp && m_python_object_sp->IsValid(); + return m_script_object_sp && m_script_object_sp->IsValid(); } lldb::ThreadSP CreateThreadFromThreadInfo( @@ -75,8 +75,9 @@ class OperatingSystemPython : public lldb_private::OperatingSystem { lldb::ValueObjectSP m_thread_list_valobj_sp; std::unique_ptr m_register_info_up; - lldb_private::ScriptInterpreter *m_interpreter; - lldb_private::StructuredData::ObjectSP m_python_object_sp; + lldb_private::ScriptInterpreter *m_interpreter = nullptr; + lldb::OperatingSystemInterfaceSP m_operating_system_interface_sp = nullptr; + lldb_private::StructuredData::GenericSP m_script_object_sp = nullptr; }; #endif // LLDB_ENABLE_PYTHON diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt index 73aeb32ead855e8..b22abc49c92a9a9 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt @@ -20,6 +20,7 @@ if (LLDB_ENABLE_LIBEDIT) endif() add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces + OperatingSystemPythonInterface.cpp ScriptedPythonInterface.cpp ScriptedProcessPythonInterface.cpp ScriptedThreadPythonInterface.cpp diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp new file mode 100644 index 000000000000000..45804d5d019d79d --- /dev/null +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp @@ -0,0 +1,77 @@ +//===-- ScriptedThreadPythonInterface.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/Config.h" +#include "lldb/Utility/Log.h" +#include "lldb/lldb-enumerations.h" + +#if LLDB_ENABLE_PYTHON + +// LLDB Python header must be included first +#include "../lldb-python.h" + +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" +#include "OperatingSystemPythonInterface.h" + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::python; +using Locker = ScriptInterpreterPythonImpl::Locker; + +OperatingSystemPythonInterface::OperatingSystemPythonInterface( + ScriptInterpreterPythonImpl &interpreter) + : OperatingSystemInterface(), ScriptedThreadPythonInterface(interpreter) {} + +StructuredData::GenericSP OperatingSystemPythonInterface::CreatePluginObject( + llvm::StringRef class_name, ExecutionContext &exe_ctx, + StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { + return ScriptedThreadPythonInterface::CreatePluginObject(class_name, exe_ctx, + args_sp, script_obj); +} + +StructuredData::DictionarySP +OperatingSystemPythonInterface::CreateThread(lldb::tid_t tid, + lldb::addr_t context) { + Status error; + StructuredData::DictionarySP dict = Dispatch( + "create_thread", error, tid, context); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + return {}; + + return dict; +} + +StructuredData::ArraySP OperatingSystemPythonInterface::GetThreadInfo() { + Status error; + StructuredData::ArraySP arr = + Dispatch("get_thread_info", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) + return {}; + + return arr; +} + +StructuredData::DictionarySP OperatingSystemPythonInterface::GetRegisterInfo() { + return ScriptedThreadPythonInterface::GetRegisterInfo(); +} + +std::optional +OperatingSystemPythonInterface::GetRegisterContextForTID(lldb::tid_t tid) { + Status error; + StructuredData::ObjectSP obj = Dispatch("get_register_data", error, tid); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + return {}; + + return obj->GetAsString()->GetValue().str(); +} + +#endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h new file mode 100644 index 000000000000000..a69557ad7c43469 --- /dev/null +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h @@ -0,0 +1,44 @@ +//===-- OperatingSystemPythonInterface.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_OPERATINGSYSTEMPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_OPERATINGSYSTEMPYTHONINTERFACE_H + +#include "lldb/Host/Config.h" + +#if LLDB_ENABLE_PYTHON + +#include "ScriptedThreadPythonInterface.h" +#include "lldb/Interpreter/Interfaces/OperatingSystemInterface.h" +#include + +namespace lldb_private { +class OperatingSystemPythonInterface + : virtual public OperatingSystemInterface, + virtual public ScriptedThreadPythonInterface { +public: + OperatingSystemPythonInterface(ScriptInterpreterPythonImpl &interpreter); + + StructuredData::GenericSP + CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, + StructuredData::DictionarySP args_sp, + StructuredData::Generic *script_obj = nullptr) override; + + StructuredData::DictionarySP CreateThread(lldb::tid_t tid, + lldb::addr_t context) override; + + StructuredData::ArraySP GetThreadInfo() override; + + StructuredData::DictionarySP GetRegisterInfo() override; + + std::optional GetRegisterContextForTID(lldb::tid_t tid) override; +}; +} // namespace lldb_private + +#endif // LLDB_ENABLE_PYTHON +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_OPERATINGSYSTEMPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 66e6ac796c9340b..a57c8e4984ad8a8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -14,6 +14,7 @@ // LLDB Python header must be included first #include "lldb-python.h" +#include "Interfaces/OperatingSystemPythonInterface.h" #include "Interfaces/ScriptedPlatformPythonInterface.h" #include "Interfaces/ScriptedProcessPythonInterface.h" #include "Interfaces/ScriptedThreadPythonInterface.h" @@ -1521,6 +1522,11 @@ ScriptInterpreterPythonImpl::CreateScriptedThreadInterface() { return std::make_shared(*this); } +OperatingSystemInterfaceSP +ScriptInterpreterPythonImpl::CreateOperatingSystemInterface() { + return std::make_shared(*this); +} + StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateStructuredDataFromScriptObject( ScriptObject obj) { @@ -1532,159 +1538,6 @@ ScriptInterpreterPythonImpl::CreateStructuredDataFromScriptObject( return py_obj.CreateStructuredObject(); } -StructuredData::GenericSP -ScriptInterpreterPythonImpl::OSPlugin_CreatePluginObject( - const char *class_name, lldb::ProcessSP process_sp) { - if (class_name == nullptr || class_name[0] == '\0') - return StructuredData::GenericSP(); - - if (!process_sp) - return StructuredData::GenericSP(); - - Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - PythonObject ret_val = SWIGBridge::LLDBSWIGPythonCreateOSPlugin( - class_name, m_dictionary_name.c_str(), process_sp); - - return StructuredData::GenericSP( - new StructuredPythonObject(std::move(ret_val))); -} - -StructuredData::DictionarySP ScriptInterpreterPythonImpl::OSPlugin_RegisterInfo( - StructuredData::ObjectSP os_plugin_object_sp) { - Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - - if (!os_plugin_object_sp) - return {}; - - StructuredData::Generic *generic = os_plugin_object_sp->GetAsGeneric(); - if (!generic) - return {}; - - PythonObject implementor(PyRefType::Borrowed, - (PyObject *)generic->GetValue()); - - if (!implementor.IsAllocated()) - return {}; - - llvm::Expected expected_py_return = - implementor.CallMethod("get_register_info"); - - if (!expected_py_return) { - llvm::consumeError(expected_py_return.takeError()); - return {}; - } - - PythonObject py_return = std::move(expected_py_return.get()); - - if (py_return.get()) { - PythonDictionary result_dict(PyRefType::Borrowed, py_return.get()); - return result_dict.CreateStructuredDictionary(); - } - return StructuredData::DictionarySP(); -} - -StructuredData::ArraySP ScriptInterpreterPythonImpl::OSPlugin_ThreadsInfo( - StructuredData::ObjectSP os_plugin_object_sp) { - Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - if (!os_plugin_object_sp) - return {}; - - StructuredData::Generic *generic = os_plugin_object_sp->GetAsGeneric(); - if (!generic) - return {}; - - PythonObject implementor(PyRefType::Borrowed, - (PyObject *)generic->GetValue()); - - if (!implementor.IsAllocated()) - return {}; - - llvm::Expected expected_py_return = - implementor.CallMethod("get_thread_info"); - - if (!expected_py_return) { - llvm::consumeError(expected_py_return.takeError()); - return {}; - } - - PythonObject py_return = std::move(expected_py_return.get()); - - if (py_return.get()) { - PythonList result_list(PyRefType::Borrowed, py_return.get()); - return result_list.CreateStructuredArray(); - } - return StructuredData::ArraySP(); -} - -StructuredData::StringSP -ScriptInterpreterPythonImpl::OSPlugin_RegisterContextData( - StructuredData::ObjectSP os_plugin_object_sp, lldb::tid_t tid) { - Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - - if (!os_plugin_object_sp) - return {}; - - StructuredData::Generic *generic = os_plugin_object_sp->GetAsGeneric(); - if (!generic) - return {}; - PythonObject implementor(PyRefType::Borrowed, - (PyObject *)generic->GetValue()); - - if (!implementor.IsAllocated()) - return {}; - - llvm::Expected expected_py_return = - implementor.CallMethod("get_register_data", tid); - - if (!expected_py_return) { - llvm::consumeError(expected_py_return.takeError()); - return {}; - } - - PythonObject py_return = std::move(expected_py_return.get()); - - if (py_return.get()) { - PythonBytes result(PyRefType::Borrowed, py_return.get()); - return result.CreateStructuredString(); - } - return {}; -} - -StructuredData::DictionarySP ScriptInterpreterPythonImpl::OSPlugin_CreateThread( - StructuredData::ObjectSP os_plugin_object_sp, lldb::tid_t tid, - lldb::addr_t context) { - Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - - if (!os_plugin_object_sp) - return {}; - - StructuredData::Generic *generic = os_plugin_object_sp->GetAsGeneric(); - if (!generic) - return {}; - - PythonObject implementor(PyRefType::Borrowed, - (PyObject *)generic->GetValue()); - - if (!implementor.IsAllocated()) - return {}; - - llvm::Expected expected_py_return = - implementor.CallMethod("create_thread", tid, context); - - if (!expected_py_return) { - llvm::consumeError(expected_py_return.takeError()); - return {}; - } - - PythonObject py_return = std::move(expected_py_return.get()); - - if (py_return.get()) { - PythonDictionary result_dict(PyRefType::Borrowed, py_return.get()); - return result_dict.CreateStructuredDictionary(); - } - return StructuredData::DictionarySP(); -} - StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateScriptedThreadPlan( const char *class_name, const StructuredDataImpl &args_data, std::string &error_str, lldb::ThreadPlanSP thread_plan_sp) { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index 00dc1d1baa309f6..a33499816d8d38c 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -134,24 +134,9 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { lldb::ScriptedProcessInterfaceUP CreateScriptedProcessInterface() override; - StructuredData::GenericSP - OSPlugin_CreatePluginObject(const char *class_name, - lldb::ProcessSP process_sp) override; - - StructuredData::DictionarySP - OSPlugin_RegisterInfo(StructuredData::ObjectSP os_plugin_object_sp) override; lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; - StructuredData::ArraySP - OSPlugin_ThreadsInfo(StructuredData::ObjectSP os_plugin_object_sp) override; - - StructuredData::StringSP - OSPlugin_RegisterContextData(StructuredData::ObjectSP os_plugin_object_sp, - lldb::tid_t thread_id) override; - - StructuredData::DictionarySP - OSPlugin_CreateThread(StructuredData::ObjectSP os_plugin_object_sp, - lldb::tid_t tid, lldb::addr_t context) override; + lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() override; StructuredData::ObjectSP LoadPluginModule(const FileSpec &file_spec, >From 9c459d5e7e5b22c30fdff221c97e803c7f9f2639 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 1 Sep 2023 16:49:01 +0100 Subject: [PATCH 4/6] [lldb] Add OperatingSystem base class to the lldb python module This patch introduces an `OperatingSystem` base implementation in the `lldb` python module to make it easier for lldb users to write their own implementation. The `OperatingSystem` base implementation is derived itself from the `ScriptedThread` base implementation since they share some common grounds. To achieve that, this patch makes changes to the `ScriptedThread` initializer since it gets called by the `OperatingSystem` initializer. I also took the opportunity to document the `OperatingSystem` base class and methods. Differential Revision: https://reviews.llvm.org/D159315 Signed-off-by: Med Ismail Bennani --- lldb/bindings/python/CMakeLists.txt | 3 +- .../python/templates/operating_system.py | 103 ++++++++++++++++++ .../python/templates/scripted_process.py | 26 +++-- .../python_os_plugin/operating_system.py | 40 +------ 4 files changed, 124 insertions(+), 48 deletions(-) create mode 100644 lldb/examples/python/templates/operating_system.py diff --git a/lldb/bindings/python/CMakeLists.txt b/lldb/bindings/python/CMakeLists.txt index c4806bda27049c6..c941f764dfc92ac 100644 --- a/lldb/bindings/python/CMakeLists.txt +++ b/lldb/bindings/python/CMakeLists.txt @@ -104,7 +104,8 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar "plugins" FILES "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_process.py" - "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_platform.py") + "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_platform.py" + "${LLDB_SOURCE_DIR}/examples/python/templates/operating_system.py") if(APPLE) create_python_package( diff --git a/lldb/examples/python/templates/operating_system.py b/lldb/examples/python/templates/operating_system.py new file mode 100644 index 000000000000000..a8053bcaa21afe7 --- /dev/null +++ b/lldb/examples/python/templates/operating_system.py @@ -0,0 +1,103 @@ +from abc import abstractmethod + +import lldb +import struct + +from lldb.plugins.scripted_process import ScriptedThread + + +class OperatingSystem(ScriptedThread): + """ + Class that provides data for an instance of a LLDB 'OperatingSystemPython' plug-in class. + + ``` + thread_info = { + "tid": tid, + "name": "four", + "queue": "queue4", + "state": "stopped", + "stop_reason": "none", + "core" : 2 + } + ``` + + - tid : thread ID (mandatory) + - name : thread name (optional key/value pair) + - queue : thread dispatch queue name (optional key/value pair) + - state : thread state (mandatory, set to 'stopped' for now) + - core : the index of the core (lldb) thread that this OS Thread should shadow + - stop_reason : thread stop reason. (mandatory, usually set to 'none') + Possible values include: + - 'breakpoint': thread is stopped at a breakpoint + - 'none': thread is stopped because the process is stopped + - 'trace': thread is stopped after single stepping + The usual value for this while threads are in memory is 'none' + - register_data_addr : the address of the register data in memory (optional key/value pair) + Specifying this key/value pair for a thread will avoid a call to get_register_data() + and can be used when your registers are in a thread context structure that is contiguous + in memory. Don't specify this if your register layout in memory doesn't match the layout + described by the dictionary returned from a call to the get_register_info() method. + """ + + def __init__(self, process): + """Initialization needs a valid lldb.SBProcess object. This plug-in + will get created after a live process is valid and has stopped for the + first time. + + Args: + process (lldb.SBProcess): The process owning this thread. + """ + self.registers = None + super().__init__(process, None) + self.registers = self.register_info + self.threads = [] + + def create_thread(self, tid, context): + """Lazily create an operating system thread using a thread information + dictionary and an optional operating system thread context address. + This method is called manually, using the SBAPI + `lldb.SBProcess.CreateOSPluginThread` affordance. + + Args: + tid (int): Thread ID to get `thread_info` dictionary for. + context (int): Address of the operating system thread struct. + + Returns: + Dict: The `thread_info` dictionary containing the various information + for lldb to create a Thread object and add it to the process thread list. + """ + return None + + @abstractmethod + def get_thread_info(self): + """Get the list of operating system threads. This method gets called + automatically every time the process stops and it needs to update its + thread list. + + Returns: + List[thread_info]: A list of `os_thread` dictionaries + containing at least for each entry, the thread id, it's name, + queue, state, stop reason. It can also contain a + `register_data_addr`. The list can be empty. + """ + pass + + @abstractmethod + def get_register_data(self, tid): + """Get the operating system thread register context for given a thread + id. This method is called when unwinding the stack of one of the + operating system threads. + + Args: + tid (int): Thread ID to get register context for. + + Returns: + str: A byte representing all register's value. + """ + pass + + def get_register_context(self): + pass + + def get_stop_reason(self): + pass diff --git a/lldb/examples/python/templates/scripted_process.py b/lldb/examples/python/templates/scripted_process.py index d74ef02dec8591c..3ddcebd128eaa6e 100644 --- a/lldb/examples/python/templates/scripted_process.py +++ b/lldb/examples/python/templates/scripted_process.py @@ -244,16 +244,16 @@ class ScriptedThread(metaclass=ABCMeta): """ @abstractmethod - def __init__(self, scripted_process, args): + def __init__(self, process, args): """Construct a scripted thread. Args: - process (ScriptedProcess): The scripted process owning this thread. + process (ScriptedProcess/lldb.SBProcess): The process owning this thread. args (lldb.SBStructuredData): A Dictionary holding arbitrary key/value pairs used by the scripted thread. """ self.target = None - self.scripted_process = None + self.originating_process = None self.process = None self.args = None self.idx = 0 @@ -268,9 +268,13 @@ def __init__(self, scripted_process, args): self.frames = [] self.extended_info = [] - if isinstance(scripted_process, ScriptedProcess): - self.target = scripted_process.target - self.scripted_process = scripted_process + if ( + isinstance(process, ScriptedProcess) + or isinstance(process, lldb.SBProcess) + and process.IsValid() + ): + self.target = process.target + self.originating_process = process self.process = self.target.GetProcess() self.get_register_info() @@ -354,14 +358,14 @@ def get_stackframes(self): def get_register_info(self): if self.register_info is None: self.register_info = dict() - if self.scripted_process.arch == "x86_64": + if self.originating_process.arch == "x86_64": self.register_info["sets"] = ["General Purpose Registers"] self.register_info["registers"] = INTEL64_GPR - elif "arm64" in self.scripted_process.arch: + elif "arm64" in self.originating_process.arch: self.register_info["sets"] = ["General Purpose Registers"] self.register_info["registers"] = ARM64_GPR else: - raise ValueError("Unknown architecture", self.scripted_process.arch) + raise ValueError("Unknown architecture", self.originating_process.arch) return self.register_info @abstractmethod @@ -505,12 +509,12 @@ def get_stop_reason(self): # TODO: Passthrough stop reason from driving process if self.driving_thread.GetStopReason() != lldb.eStopReasonNone: - if "arm64" in self.scripted_process.arch: + if "arm64" in self.originating_process.arch: stop_reason["type"] = lldb.eStopReasonException stop_reason["data"][ "desc" ] = self.driving_thread.GetStopDescription(100) - elif self.scripted_process.arch == "x86_64": + elif self.originating_process.arch == "x86_64": stop_reason["type"] = lldb.eStopReasonSignal stop_reason["data"]["signal"] = signal.SIGTRAP else: diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py index 52c678fac2efedf..f4404d78492f98d 100644 --- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py @@ -1,29 +1,14 @@ -#!/usr/bin/env python - import lldb import struct +from lldb.plugins.operating_system import OperatingSystem + -class OperatingSystemPlugIn(object): +class OperatingSystemPlugIn(OperatingSystem): """Class that provides data for an instance of a LLDB 'OperatingSystemPython' plug-in class""" def __init__(self, process): - """Initialization needs a valid.SBProcess object. - - This plug-in will get created after a live process is valid and has stopped for the - first time.""" - self.process = None - self.registers = None - self.threads = None - if isinstance(process, lldb.SBProcess) and process.IsValid(): - self.process = process - self.threads = None # Will be an dictionary containing info for each thread - - def get_target(self): - # NOTE: Don't use "lldb.target" when trying to get your target as the "lldb.target" - # tracks the current target in the LLDB command interpreter which isn't the - # correct thing to use for this plug-in. - return self.process.target + super().__init__(process) def create_thread(self, tid, context): if tid == 0x444444444: @@ -40,23 +25,6 @@ def create_thread(self, tid, context): def get_thread_info(self): if not self.threads: - # The sample dictionary below shows the values that can be returned for a thread - # tid => thread ID (mandatory) - # name => thread name (optional key/value pair) - # queue => thread dispatch queue name (optional key/value pair) - # state => thred state (mandatory, set to 'stopped' for now) - # stop_reason => thread stop reason. (mandatory, usually set to 'none') - # Possible values include: - # 'breakpoint' if the thread is stopped at a breakpoint - # 'none' thread is just stopped because the process is stopped - # 'trace' the thread just single stepped - # The usual value for this while threads are in memory is 'none' - # register_data_addr => the address of the register data in memory (optional key/value pair) - # Specifying this key/value pair for a thread will avoid a call to get_register_data() - # and can be used when your registers are in a thread context structure that is contiguous - # in memory. Don't specify this if your register layout in memory doesn't match the layout - # described by the dictionary returned from a call to the - # get_register_info() method. self.threads = [ { "tid": 0x111111111, >From 0211a15706955d84776d68d2ccf7e2b0bb4c007e Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Sat, 26 Aug 2023 00:44:38 +0100 Subject: [PATCH 5/6] [lldb/docs] Add OperatingSystem documentation to the website Signed-off-by: Med Ismail Bennani --- lldb/docs/use/python.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lldb/docs/use/python.rst b/lldb/docs/use/python.rst index 6183d6935d80ef8..83e10b9df619bf6 100644 --- a/lldb/docs/use/python.rst +++ b/lldb/docs/use/python.rst @@ -11,6 +11,15 @@ some of these things by going through an example, explaining how to use Python scripting to find a bug in a program that searches for text in a large binary tree. +Operating System Thread Plugins +------------------------------- + +.. literalinclude:: ../../examples/python/templates/operating_system.py + :language: python + :linenos: + :encoding: utf-8 + :pyobject: OperatingSystem + The Test Program and Input -------------------------- >From 3ef039a731e09ec3767e71714fc273084c5773a8 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 25 Sep 2023 20:46:46 +0200 Subject: [PATCH 6/6] [lldb] Add new interface for ScriptedThreadPlan Signed-off-by: Med Ismail Bennani --- .../Interfaces/ScriptedInterface.h | 10 ++- .../Interfaces/ScriptedPlatformInterface.h | 8 +- .../Interfaces/ScriptedProcessInterface.h | 8 +- .../Interfaces/ScriptedThreadInterface.h | 8 +- .../Interfaces/ScriptedThreadPlanInterface.h | 42 +++++++++ .../lldb/Interpreter/ScriptInterpreter.h | 41 ++------- lldb/include/lldb/Target/ThreadPlanPython.h | 2 + lldb/include/lldb/lldb-forward.h | 3 + .../Python/OperatingSystemPython.cpp | 3 +- .../Process/scripted/ScriptedProcess.cpp | 3 +- .../Process/scripted/ScriptedThread.cpp | 4 +- .../OperatingSystemPythonInterface.cpp | 10 ++- .../OperatingSystemPythonInterface.h | 7 +- .../ScriptedPlatformPythonInterface.cpp | 25 ------ .../ScriptedPlatformPythonInterface.h | 11 +-- .../ScriptedProcessPythonInterface.cpp | 25 ------ .../ScriptedProcessPythonInterface.h | 9 +- .../Interfaces/ScriptedPythonInterface.h | 61 +++++++++++++ .../ScriptedThreadPythonInterface.cpp | 64 +++++++------- .../ScriptedThreadPythonInterface.h | 5 -- .../Python/ScriptInterpreterPython.cpp | 88 ------------------- .../Python/ScriptInterpreterPythonImpl.h | 19 ---- lldb/source/Target/ThreadPlanPython.cpp | 38 ++++---- 23 files changed, 209 insertions(+), 285 deletions(-) create mode 100644 lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h index 948f763e95ecea4..31b1087fe93098f 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h @@ -25,10 +25,12 @@ class ScriptedInterface { ScriptedInterface() = default; virtual ~ScriptedInterface() = default; - virtual StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) = 0; + template + StructuredData::GenericSP + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args && ...args) { + llvm_unreachable("Not implemented."); + } StructuredData::GenericSP GetScriptObjectInstance() { return m_object_instance_sp; diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h index c687cabfe0c1278..cf7ba42af77d02a 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h @@ -19,11 +19,11 @@ namespace lldb_private { class ScriptedPlatformInterface : virtual public ScriptedInterface { public: + template StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override { - return {}; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args &&...args) { + llvm_unreachable("Not implemented."); } virtual StructuredData::DictionarySP ListProcesses() { return {}; } diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h index 68756c4d9ac858d..9aec14c92f2c3ba 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h @@ -21,11 +21,11 @@ namespace lldb_private { class ScriptedProcessInterface : virtual public ScriptedInterface { public: + template StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override { - return {}; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args &&...args) { + llvm_unreachable("Not implemented."); } virtual StructuredData::DictionarySP GetCapabilities() { return {}; } diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h index 781df51a213229a..9c6e46cb7f59ebe 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h @@ -20,11 +20,11 @@ namespace lldb_private { class ScriptedThreadInterface : virtual public ScriptedInterface { public: + template StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override { - return {}; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args &&...args) { + llvm_unreachable("Not implemented."); } virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; } diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h new file mode 100644 index 000000000000000..46da47fd460bf0e --- /dev/null +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h @@ -0,0 +1,42 @@ +//===-- ScriptedThreadInterface.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H + +#include "ScriptedInterface.h" +#include "lldb/Core/StructuredDataImpl.h" + +#include "lldb/lldb-private.h" + +#include +#include + +namespace lldb_private { +class ScriptedThreadPlanInterface : virtual public ScriptedInterface { +public: + template + StructuredData::GenericSP + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args &&...args) { + llvm_unreachable("Not implemented."); + } + + virtual bool ExplainsStop(Event *event) { return true; } + + virtual bool ShouldStop(Event *event) { return true; } + + virtual bool IsStale() { return true; }; + + virtual lldb::StateType GetRunState() { return lldb::eStateStepping; } + + bool GetStopDescription(lldb_private::Stream *s) { return true; } +}; +} // namespace lldb_private + +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 57a1962441e9510..cc4ff96de747c01 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -25,6 +25,7 @@ #include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Interpreter/ScriptObject.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/Status.h" @@ -283,42 +284,6 @@ class ScriptInterpreter : public PluginInterface { return StructuredData::ObjectSP(); } - virtual bool - ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) { - script_error = true; - return true; - } - - virtual bool - ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) { - script_error = true; - return true; - } - - virtual bool - ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, - bool &script_error) { - script_error = true; - return true; - } - - virtual lldb::StateType - ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, - bool &script_error) { - script_error = true; - return lldb::eStateStepping; - } - - virtual bool - ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, - lldb_private::Stream *stream, - bool &script_error) { - script_error = true; - return false; - } - virtual StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, @@ -587,6 +552,10 @@ class ScriptInterpreter : public PluginInterface { virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { return std::make_shared(); } + + virtual lldb::ScriptedThreadPlanInterfaceSP CreateScriptedThreadPlanInterface() { + return std::make_shared(); + } virtual lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() { return std::make_shared(); diff --git a/lldb/include/lldb/Target/ThreadPlanPython.h b/lldb/include/lldb/Target/ThreadPlanPython.h index 64854d66b8f2589..da106faf951db1c 100644 --- a/lldb/include/lldb/Target/ThreadPlanPython.h +++ b/lldb/include/lldb/Target/ThreadPlanPython.h @@ -13,6 +13,7 @@ #include #include "lldb/Core/StructuredDataImpl.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -70,6 +71,7 @@ class ThreadPlanPython : public ThreadPlan { StreamString m_stop_description; // Cache the stop description here bool m_did_push; bool m_stop_others; + lldb::ScriptedThreadPlanInterfaceSP m_interface; ThreadPlanPython(const ThreadPlanPython &) = delete; const ThreadPlanPython &operator=(const ThreadPlanPython &) = delete; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index aa099d4abc3b09f..6138e6fe5a60b45 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -185,6 +185,7 @@ class ScriptedMetadata; class ScriptedPlatformInterface; class ScriptedProcessInterface; class ScriptedThreadInterface; +class ScriptedThreadPlanInterface; class ScriptedSyntheticChildren; class SearchFilter; class Section; @@ -393,6 +394,8 @@ typedef std::unique_ptr ScriptedProcessInterfaceUP; typedef std::shared_ptr ScriptedThreadInterfaceSP; +typedef std::shared_ptr + ScriptedThreadPlanInterfaceSP; typedef std::shared_ptr SectionSP; typedef std::unique_ptr SectionListUP; typedef std::weak_ptr SectionWP; diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp index 9a5dd515489231c..28739a07de7da5b 100644 --- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp +++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp @@ -116,8 +116,7 @@ OperatingSystemPython::OperatingSystemPython(lldb_private::Process *process, ExecutionContext exe_ctx(process); StructuredData::GenericSP owned_script_object_sp = - operating_system_interface->CreatePluginObject(os_plugin_class_name, - exe_ctx, nullptr); + operating_system_interface->CreatePluginObject(os_plugin_class_name, nullptr, exe_ctx); if (!owned_script_object_sp) // return llvm::createStringError(llvm::inconvertibleErrorCode(), diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index e99a2a08bd50d8f..614c699f55d3d03 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -109,8 +109,7 @@ ScriptedProcess::ScriptedProcess(lldb::TargetSP target_sp, // Create process script object StructuredData::GenericSP object_sp = GetInterface().CreatePluginObject( - m_scripted_metadata.GetClassName(), exe_ctx, - m_scripted_metadata.GetArgsSP()); + m_scripted_metadata.GetClassName(), nullptr, exe_ctx, m_scripted_metadata.GetArgsSP()); if (!object_sp || !object_sp->IsValid()) { error.SetErrorStringWithFormat("ScriptedProcess::%s () - ERROR: %s", diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index 684375957d24760..e4af8e95a753bd8 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -58,8 +58,8 @@ ScriptedThread::Create(ScriptedProcess &process, ExecutionContext exe_ctx(process); StructuredData::GenericSP owned_script_object_sp = scripted_thread_interface->CreatePluginObject( - thread_class_name, exe_ctx, process.m_scripted_metadata.GetArgsSP(), - script_object); + thread_class_name, script_object, + exe_ctx, process.m_scripted_metadata.GetArgsSP()); if (!owned_script_object_sp) return llvm::createStringError(llvm::inconvertibleErrorCode(), diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp index 45804d5d019d79d..4efbbbfe40870fb 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp @@ -29,10 +29,12 @@ OperatingSystemPythonInterface::OperatingSystemPythonInterface( : OperatingSystemInterface(), ScriptedThreadPythonInterface(interpreter) {} StructuredData::GenericSP OperatingSystemPythonInterface::CreatePluginObject( - llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { - return ScriptedThreadPythonInterface::CreatePluginObject(class_name, exe_ctx, - args_sp, script_obj); + llvm::StringRef class_name, StructuredData::Generic *script_obj, + ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp) { + return ScriptedPythonInterface::CreatePluginObject(class_name, + script_obj, + exe_ctx, + args_sp); } StructuredData::DictionarySP diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h index a69557ad7c43469..87fded49f796442 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h @@ -25,9 +25,10 @@ class OperatingSystemPythonInterface OperatingSystemPythonInterface(ScriptInterpreterPythonImpl &interpreter); StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, + ExecutionContext &exe_ctx, + StructuredData::DictionarySP args_sp); StructuredData::DictionarySP CreateThread(lldb::tid_t tid, lldb::addr_t context) override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp index 9bed33516915d08..f2a65583927d4af 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp @@ -29,31 +29,6 @@ ScriptedPlatformPythonInterface::ScriptedPlatformPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedPlatformInterface(), ScriptedPythonInterface(interpreter) {} -StructuredData::GenericSP ScriptedPlatformPythonInterface::CreatePluginObject( - llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { - if (class_name.empty()) - return {}; - - StructuredDataImpl args_impl(args_sp); - std::string error_string; - - Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, - Locker::FreeLock); - - lldb::ExecutionContextRefSP exe_ctx_ref_sp = - std::make_shared(exe_ctx); - - PythonObject ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( - class_name.str().c_str(), m_interpreter.GetDictionaryName(), - exe_ctx_ref_sp, args_impl, error_string); - - m_object_instance_sp = - StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val))); - - return m_object_instance_sp; -} - StructuredData::DictionarySP ScriptedPlatformPythonInterface::ListProcesses() { Status error; StructuredData::DictionarySP dict_sp = diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h index 02deecd15ede062..45e6c7fc1c1b979 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h @@ -21,12 +21,13 @@ class ScriptedPlatformPythonInterface : public ScriptedPlatformInterface, public ScriptedPythonInterface { public: ScriptedPlatformPythonInterface(ScriptInterpreterPythonImpl &interpreter); - + + template StructuredData::GenericSP - CreatePluginObject(const llvm::StringRef class_name, - ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args ...args) { + return ScriptedPythonInterface::CreatePluginObject(class_name, std::forward(args)...); + } StructuredData::DictionarySP ListProcesses() override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp index 63a4db1ff5973e7..90549c353bafc43 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp @@ -33,31 +33,6 @@ ScriptedProcessPythonInterface::ScriptedProcessPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedProcessInterface(), ScriptedPythonInterface(interpreter) {} -StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject( - llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { - if (class_name.empty()) - return {}; - - StructuredDataImpl args_impl(args_sp); - std::string error_string; - - Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, - Locker::FreeLock); - - lldb::ExecutionContextRefSP exe_ctx_ref_sp = - std::make_shared(exe_ctx); - - PythonObject ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( - class_name.str().c_str(), m_interpreter.GetDictionaryName(), - exe_ctx_ref_sp, args_impl, error_string); - - m_object_instance_sp = - StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val))); - - return m_object_instance_sp; -} - StructuredData::DictionarySP ScriptedProcessPythonInterface::GetCapabilities() { Status error; StructuredData::DictionarySP dict = diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h index 11330f5591b74e6..4555cc0b1e84b9a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h @@ -23,11 +23,12 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, public: ScriptedProcessPythonInterface(ScriptInterpreterPythonImpl &interpreter); + template StructuredData::GenericSP - CreatePluginObject(const llvm::StringRef class_name, - ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override; + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args ...args) { + return ScriptedPythonInterface::CreatePluginObject(class_name, std::forward(args)...); + } StructuredData::DictionarySP GetCapabilities() override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 9163b8f6aede7d1..706cfee32dc2370 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -30,6 +30,67 @@ class ScriptInterpreterPythonImpl; class ScriptedPythonInterface : virtual public ScriptedInterface { public: ScriptedPythonInterface(ScriptInterpreterPythonImpl &interpreter); + + template + StructuredData::GenericSP + CreatePluginObject(llvm::StringRef class_name, + StructuredData::Generic *script_obj, Args ...args) { + using namespace python; + using Locker = ScriptInterpreterPythonImpl::Locker; + + std::string error_string; + if (class_name.empty() || llvm::StringRef(m_interpreter.GetDictionaryName()).empty()) + return {}; + + Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + auto dict = PythonModule::MainModule().ResolveName(m_interpreter.GetDictionaryName()); + auto pfunc = PythonObject::ResolveNameWithDictionary(class_name, dict); + + if (!pfunc.IsAllocated()) { + error_string.append("could not find script class: "); + error_string.append(class_name); + return {}; + } + + std::tuple original_args = std::forward_as_tuple(args...); + auto transformed_args = TransformArgs(original_args); + + llvm::Expected arg_info = pfunc.GetArgInfo(); + if (!arg_info) { + llvm::handleAllErrors( + arg_info.takeError(), + [&](PythonException &E) { error_string.append(E.ReadBacktrace()); }, + [&](const llvm::ErrorInfoBase &E) { + error_string.append(E.message()); + }); + return {}; + } + + PythonObject result = {}; +// switch (arg_info.get().max_positional_args) { +// case 1: +// // FIXME: Since this is used by different scripting affordances, they can have different number +// // of argument but also different types of arguments (i.e SBExecutionContect vs SBProcess) +// // We need to have a more reliable way to forward positional arguments. +// result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp->GetProcessSP())); +// break; +// case 2: +// result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp), SWIGBridge::ToSWIGWrapper(args_impl)); +// break; +// default: +// error_string.assign("wrong number of arguments in __init__, should be 2 " +// "(not including self)"); +// break; +// } + + if (!result.IsValid()) + return {}; + + m_object_instance_sp = StructuredData::GenericSP(new StructuredPythonObject(std::move(result))); + return m_object_instance_sp; + } + ~ScriptedPythonInterface() override = default; protected: diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index 6addcd46e045efc..1721565b3e2f3f2 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -29,38 +29,38 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedThreadInterface(), ScriptedPythonInterface(interpreter) {} -StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject( - const llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { - if (class_name.empty() && !script_obj) - return {}; - - StructuredDataImpl args_impl(args_sp); - std::string error_string; - - Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, - Locker::FreeLock); - - PythonObject ret_val; - - if (!script_obj) { - lldb::ExecutionContextRefSP exe_ctx_ref_sp = - std::make_shared(exe_ctx); - ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( - class_name.str().c_str(), m_interpreter.GetDictionaryName(), - exe_ctx_ref_sp, args_impl, error_string); - } else - ret_val = PythonObject(PyRefType::Borrowed, - static_cast(script_obj->GetValue())); - - if (!ret_val) - return {}; - - m_object_instance_sp = - StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val))); - - return m_object_instance_sp; -} +//StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject( +// const llvm::StringRef class_name, ExecutionContext &exe_ctx, +// StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { +// if (class_name.empty() && !script_obj) +// return {}; +// +// StructuredDataImpl args_impl(args_sp); +// std::string error_string; +// +// Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, +// Locker::FreeLock); +// +// PythonObject ret_val; +// +// if (!script_obj) { +// lldb::ExecutionContextRefSP exe_ctx_ref_sp = +// std::make_shared(exe_ctx); +// ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( +// class_name.str().c_str(), m_interpreter.GetDictionaryName(), +// exe_ctx_ref_sp, args_impl, error_string); +// } else +// ret_val = PythonObject(PyRefType::Borrowed, +// static_cast(script_obj->GetValue())); +// +// if (!ret_val) +// return {}; +// +// m_object_instance_sp = +// StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val))); +// +// return m_object_instance_sp; +//} lldb::tid_t ScriptedThreadPythonInterface::GetThreadID() { Status error; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h index b63760fd5b5713d..72293538c779309 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h @@ -23,11 +23,6 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface, public: ScriptedThreadPythonInterface(ScriptInterpreterPythonImpl &interpreter); - StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp, - StructuredData::Generic *script_obj = nullptr) override; - lldb::tid_t GetThreadID() override; std::optional GetName() override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index a57c8e4984ad8a8..63ac3b41a4dce2e 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -1566,94 +1566,6 @@ StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateScriptedThreadPlan( new StructuredPythonObject(std::move(ret_val))); } -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanExplainsStop( - StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { - bool explains_stop = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - explains_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "explains_stop", event, script_error); - if (script_error) - return true; - } - return explains_stop; -} - -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanShouldStop( - StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { - bool should_stop = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - should_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "should_stop", event, script_error); - if (script_error) - return true; - } - return should_stop; -} - -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanIsStale( - StructuredData::ObjectSP implementor_sp, bool &script_error) { - bool is_stale = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - is_stale = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "is_stale", (Event *)nullptr, script_error); - if (script_error) - return true; - } - return is_stale; -} - -lldb::StateType ScriptInterpreterPythonImpl::ScriptedThreadPlanGetRunState( - StructuredData::ObjectSP implementor_sp, bool &script_error) { - bool should_step = false; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - should_step = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "should_step", (Event *)nullptr, script_error); - if (script_error) - should_step = true; - } - if (should_step) - return lldb::eStateStepping; - return lldb::eStateRunning; -} - -bool -ScriptInterpreterPythonImpl::ScriptedThreadPlanGetStopDescription( - StructuredData::ObjectSP implementor_sp, lldb_private::Stream *stream, - bool &script_error) { - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (!generic) { - script_error = true; - return false; - } - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - return SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "stop_description", stream, script_error); -} - - StructuredData::GenericSP ScriptInterpreterPythonImpl::CreateScriptedBreakpointResolver( const char *class_name, const StructuredDataImpl &args_data, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index a33499816d8d38c..7daa602eec33c3b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -86,25 +86,6 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { StructuredData::ObjectSP CreateStructuredDataFromScriptObject(ScriptObject obj) override; - bool ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, - Event *event, - bool &script_error) override; - - bool ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) override; - - bool ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, - bool &script_error) override; - - lldb::StateType - ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, - bool &script_error) override; - - bool - ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, - lldb_private::Stream *s, - bool &script_error) override; - StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, diff --git a/lldb/source/Target/ThreadPlanPython.cpp b/lldb/source/Target/ThreadPlanPython.cpp index d6de6b3c3cf0497..d7e0d59ea205cd3 100644 --- a/lldb/source/Target/ThreadPlanPython.cpp +++ b/lldb/source/Target/ThreadPlanPython.cpp @@ -32,6 +32,15 @@ ThreadPlanPython::ThreadPlanPython(Thread &thread, const char *class_name, eVoteNoOpinion, eVoteNoOpinion), m_class_name(class_name), m_args_data(args_data), m_did_push(false), m_stop_others(false) { + ScriptInterpreter* interpreter = GetScriptInterpreter(); + if (!interpreter) { + SetPlanComplete(false); + // FIXME: error handling + return; + } + + + m_interface = interpreter->CreateScriptedThreadPlanInterface(); SetIsControllingPlan(true); SetOkayToDiscard(true); SetPrivate(false); @@ -80,10 +89,9 @@ bool ThreadPlanPython::ShouldStop(Event *event_ptr) { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { bool script_error; - should_stop = script_interp->ScriptedThreadPlanShouldStop( - m_implementation_sp, event_ptr, script_error); - if (script_error) - SetPlanComplete(false); + should_stop = m_interface->ShouldStop(event_ptr); +// if (script_error) +// SetPlanComplete(false); } } return should_stop; @@ -99,10 +107,9 @@ bool ThreadPlanPython::IsPlanStale() { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { bool script_error; - is_stale = script_interp->ScriptedThreadPlanIsStale(m_implementation_sp, - script_error); - if (script_error) - SetPlanComplete(false); + is_stale = m_interface->IsStale(); +// if (script_error) +// SetPlanComplete(false); } } return is_stale; @@ -118,10 +125,9 @@ bool ThreadPlanPython::DoPlanExplainsStop(Event *event_ptr) { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { bool script_error; - explains_stop = script_interp->ScriptedThreadPlanExplainsStop( - m_implementation_sp, event_ptr, script_error); - if (script_error) - SetPlanComplete(false); + explains_stop = m_interface->ExplainsStop(event_ptr); +// if (script_error) +// SetPlanComplete(false); } } return explains_stop; @@ -154,8 +160,7 @@ lldb::StateType ThreadPlanPython::GetPlanRunState() { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { bool script_error; - run_state = script_interp->ScriptedThreadPlanGetRunState( - m_implementation_sp, script_error); + run_state = m_interface->GetRunState(); } } return run_state; @@ -168,9 +173,8 @@ void ThreadPlanPython::GetDescription(Stream *s, lldb::DescriptionLevel level) { if (m_implementation_sp) { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { - bool script_error; - bool added_desc = script_interp->ScriptedThreadPlanGetStopDescription( - m_implementation_sp, s, script_error); + bool script_error = true; + bool added_desc = m_interface->GetStopDescription(s); if (script_error || !added_desc) s->Printf("Python thread plan implemented by class %s.", m_class_name.c_str()); From lldb-commits at lists.llvm.org Mon Oct 2 16:51:42 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 16:51:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Unifying Scripted Affordance Interfaces (wip) (PR #68052) In-Reply-To: Message-ID: <651b578e.650a0220.3ff93.00f4@mx.google.com> https://github.com/medismailben edited https://github.com/llvm/llvm-project/pull/68052 From lldb-commits at lists.llvm.org Mon Oct 2 16:55:21 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 16:55:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb/test] Update TestProgressReporting.py (NFC) (PR #68053) Message-ID: https://github.com/medismailben created https://github.com/llvm/llvm-project/pull/68053 None >From fddfbe53a9ab5c27da24b233cc449d0fe102a4b1 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 2 Oct 2023 16:54:45 -0700 Subject: [PATCH] [lldb/test] Update TestProgressReporting.py (NFC) Signed-off-by: Med Ismail Bennani --- .../functionalities/progress_reporting/TestProgressReporting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 0e72770e350366d..42d6c6e206f701e 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -11,6 +11,7 @@ class TestProgressReporting(TestBase): def setUp(self): TestBase.setUp(self) + self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress From lldb-commits at lists.llvm.org Mon Oct 2 17:00:04 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 17:00:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5984.170a0220.8cbb7.01bb@mx.google.com> https://github.com/bulbazord edited https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:00:04 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 17:00:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5984.170a0220.d02e7.01f2@mx.google.com> ================ @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, + SBError &error) { + LLDB_INSTRUMENT_VA(this, attach_info); ---------------- bulbazord wrote: This needs to instrument more things than just attach_info, right? https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:00:04 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 17:00:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5984.170a0220.da908.0276@mx.google.com> ================ @@ -19,6 +20,7 @@ struct PlatformShellCommand; namespace lldb { class SBLaunchInfo; +class SBAttachInfo; ---------------- bulbazord wrote: sort https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:00:04 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 17:00:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5984.170a0220.6d372.023f@mx.google.com> ================ @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, ---------------- bulbazord wrote: I'd like to see the `attach_info` parameter marked const if possible, but it looks like `SBAttachInfo::ref` is not const. Maybe we can add an overload? I think that you can do `const SBTarget &target` without any further changes because `SBTarget::GetSP` is marked `const`. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:00:05 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Mon, 02 Oct 2023 17:00:05 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b5985.170a0220.ae56.0480@mx.google.com> https://github.com/bulbazord commented: Looks fine to me, but I want to really make sure we get this interface right. Left some comments inline. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:00:31 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 17:00:31 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651b599f.170a0220.26676.0247@mx.google.com> https://github.com/medismailben approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Mon Oct 2 17:02:26 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:02:26 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Unifying Scripted Affordance Interfaces (wip) (PR #68052) In-Reply-To: Message-ID: <651b5a12.170a0220.26676.025c@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff 824b1677a44e25b7c9808c774ba2d894ff14df2b 3ef039a731e09ec3767e71714fc273084c5773a8 -- lldb/include/lldb/Interpreter/Interfaces/OperatingSystemInterface.h lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h lldb/include/lldb/Interpreter/ScriptInterpreter.h lldb/include/lldb/Target/ThreadPlanPython.h lldb/include/lldb/lldb-forward.h lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp lldb/source/Plugins/Process/scripted/ScriptedThread.cpp lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h lldb/source/Target/ThreadPlanPython.cpp lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.h ``````````
View the diff from clang-format here. ``````````diff diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h index 31b1087fe930..3202eae9d195 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h @@ -25,10 +25,10 @@ public: ScriptedInterface() = default; virtual ~ScriptedInterface() = default; - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, - StructuredData::Generic *script_obj, Args && ...args) { + StructuredData::Generic *script_obj, Args &&...args) { llvm_unreachable("Not implemented."); } diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h index cf7ba42af77d..28cf1f104fb2 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h @@ -19,7 +19,7 @@ namespace lldb_private { class ScriptedPlatformInterface : virtual public ScriptedInterface { public: - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, StructuredData::Generic *script_obj, Args &&...args) { diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h index 9aec14c92f2c..54350914f5da 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedProcessInterface.h @@ -21,7 +21,7 @@ namespace lldb_private { class ScriptedProcessInterface : virtual public ScriptedInterface { public: - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, StructuredData::Generic *script_obj, Args &&...args) { diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h index 9c6e46cb7f59..48b81d86b306 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadInterface.h @@ -20,7 +20,7 @@ namespace lldb_private { class ScriptedThreadInterface : virtual public ScriptedInterface { public: - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, StructuredData::Generic *script_obj, Args &&...args) { diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h index 46da47fd460b..cc565cb74a04 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h @@ -20,13 +20,13 @@ namespace lldb_private { class ScriptedThreadPlanInterface : virtual public ScriptedInterface { public: - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, StructuredData::Generic *script_obj, Args &&...args) { llvm_unreachable("Not implemented."); } - + virtual bool ExplainsStop(Event *event) { return true; } virtual bool ShouldStop(Event *event) { return true; } diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index cc4ff96de747..deb7168e80e9 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -552,8 +552,9 @@ public: virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { return std::make_shared(); } - - virtual lldb::ScriptedThreadPlanInterfaceSP CreateScriptedThreadPlanInterface() { + + virtual lldb::ScriptedThreadPlanInterfaceSP + CreateScriptedThreadPlanInterface() { return std::make_shared(); } diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp index 28739a07de7d..b67a1c0aa442 100644 --- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp +++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp @@ -116,7 +116,8 @@ OperatingSystemPython::OperatingSystemPython(lldb_private::Process *process, ExecutionContext exe_ctx(process); StructuredData::GenericSP owned_script_object_sp = - operating_system_interface->CreatePluginObject(os_plugin_class_name, nullptr, exe_ctx); + operating_system_interface->CreatePluginObject(os_plugin_class_name, + nullptr, exe_ctx); if (!owned_script_object_sp) // return llvm::createStringError(llvm::inconvertibleErrorCode(), diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 614c699f55d3..9e71f35aa7ff 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -109,7 +109,8 @@ ScriptedProcess::ScriptedProcess(lldb::TargetSP target_sp, // Create process script object StructuredData::GenericSP object_sp = GetInterface().CreatePluginObject( - m_scripted_metadata.GetClassName(), nullptr, exe_ctx, m_scripted_metadata.GetArgsSP()); + m_scripted_metadata.GetClassName(), nullptr, exe_ctx, + m_scripted_metadata.GetArgsSP()); if (!object_sp || !object_sp->IsValid()) { error.SetErrorStringWithFormat("ScriptedProcess::%s () - ERROR: %s", diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index e4af8e95a753..c81f1700c881 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -58,8 +58,8 @@ ScriptedThread::Create(ScriptedProcess &process, ExecutionContext exe_ctx(process); StructuredData::GenericSP owned_script_object_sp = scripted_thread_interface->CreatePluginObject( - thread_class_name, script_object, - exe_ctx, process.m_scripted_metadata.GetArgsSP()); + thread_class_name, script_object, exe_ctx, + process.m_scripted_metadata.GetArgsSP()); if (!owned_script_object_sp) return llvm::createStringError(llvm::inconvertibleErrorCode(), diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp index 4efbbbfe4087..37052014df67 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.cpp @@ -31,10 +31,8 @@ OperatingSystemPythonInterface::OperatingSystemPythonInterface( StructuredData::GenericSP OperatingSystemPythonInterface::CreatePluginObject( llvm::StringRef class_name, StructuredData::Generic *script_obj, ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp) { - return ScriptedPythonInterface::CreatePluginObject(class_name, - script_obj, - exe_ctx, - args_sp); + return ScriptedPythonInterface::CreatePluginObject(class_name, script_obj, + exe_ctx, args_sp); } StructuredData::DictionarySP diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h index 87fded49f796..1e2560865c45 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/OperatingSystemPythonInterface.h @@ -24,11 +24,9 @@ class OperatingSystemPythonInterface public: OperatingSystemPythonInterface(ScriptInterpreterPythonImpl &interpreter); - StructuredData::GenericSP - CreatePluginObject(llvm::StringRef class_name, - StructuredData::Generic *script_obj, - ExecutionContext &exe_ctx, - StructuredData::DictionarySP args_sp); + StructuredData::GenericSP CreatePluginObject( + llvm::StringRef class_name, StructuredData::Generic *script_obj, + ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp); StructuredData::DictionarySP CreateThread(lldb::tid_t tid, lldb::addr_t context) override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h index 45e6c7fc1c1b..e51a695a12bf 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.h @@ -21,12 +21,13 @@ class ScriptedPlatformPythonInterface : public ScriptedPlatformInterface, public ScriptedPythonInterface { public: ScriptedPlatformPythonInterface(ScriptInterpreterPythonImpl &interpreter); - - template + + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, - StructuredData::Generic *script_obj, Args ...args) { - return ScriptedPythonInterface::CreatePluginObject(class_name, std::forward(args)...); + StructuredData::Generic *script_obj, Args... args) { + return ScriptedPythonInterface::CreatePluginObject( + class_name, std::forward(args)...); } StructuredData::DictionarySP ListProcesses() override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h index 4555cc0b1e84..9bb325f4af20 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.h @@ -23,11 +23,12 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, public: ScriptedProcessPythonInterface(ScriptInterpreterPythonImpl &interpreter); - template + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, - StructuredData::Generic *script_obj, Args ...args) { - return ScriptedPythonInterface::CreatePluginObject(class_name, std::forward(args)...); + StructuredData::Generic *script_obj, Args... args) { + return ScriptedPythonInterface::CreatePluginObject( + class_name, std::forward(args)...); } StructuredData::DictionarySP GetCapabilities() override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 706cfee32dc2..9326dced119c 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -30,29 +30,34 @@ class ScriptInterpreterPythonImpl; class ScriptedPythonInterface : virtual public ScriptedInterface { public: ScriptedPythonInterface(ScriptInterpreterPythonImpl &interpreter); - - template + + template StructuredData::GenericSP CreatePluginObject(llvm::StringRef class_name, - StructuredData::Generic *script_obj, Args ...args) { + StructuredData::Generic *script_obj, Args... args) { using namespace python; using Locker = ScriptInterpreterPythonImpl::Locker; - + std::string error_string; - if (class_name.empty() || llvm::StringRef(m_interpreter.GetDictionaryName()).empty()) + if (class_name.empty() || + llvm::StringRef(m_interpreter.GetDictionaryName()).empty()) return {}; Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); - auto dict = PythonModule::MainModule().ResolveName(m_interpreter.GetDictionaryName()); - auto pfunc = PythonObject::ResolveNameWithDictionary(class_name, dict); + auto dict = + PythonModule::MainModule().ResolveName( + m_interpreter.GetDictionaryName()); + auto pfunc = + PythonObject::ResolveNameWithDictionary( + class_name, dict); if (!pfunc.IsAllocated()) { error_string.append("could not find script class: "); error_string.append(class_name); return {}; } - + std::tuple original_args = std::forward_as_tuple(args...); auto transformed_args = TransformArgs(original_args); @@ -60,7 +65,7 @@ public: if (!arg_info) { llvm::handleAllErrors( arg_info.takeError(), - [&](PythonException &E) { error_string.append(E.ReadBacktrace()); }, + [&](PythonException &E) { error_string.append(E.ReadBacktrace()); }, [&](const llvm::ErrorInfoBase &E) { error_string.append(E.message()); }); @@ -68,29 +73,34 @@ public: } PythonObject result = {}; -// switch (arg_info.get().max_positional_args) { -// case 1: -// // FIXME: Since this is used by different scripting affordances, they can have different number -// // of argument but also different types of arguments (i.e SBExecutionContect vs SBProcess) -// // We need to have a more reliable way to forward positional arguments. -// result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp->GetProcessSP())); -// break; -// case 2: -// result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp), SWIGBridge::ToSWIGWrapper(args_impl)); -// break; -// default: -// error_string.assign("wrong number of arguments in __init__, should be 2 " -// "(not including self)"); -// break; -// } - + // switch (arg_info.get().max_positional_args) { + // case 1: + // // FIXME: Since this is used by different scripting affordances, + // they can have different number + // // of argument but also different types of arguments (i.e + // SBExecutionContect vs SBProcess) + // // We need to have a more reliable way to forward positional + // arguments. result = + // pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp->GetProcessSP())); + // break; + // case 2: + // result = pfunc(SWIGBridge::ToSWIGWrapper(exe_ctx_sp), + // SWIGBridge::ToSWIGWrapper(args_impl)); break; + // default: + // error_string.assign("wrong number of arguments in __init__, should + // be 2 " + // "(not including self)"); + // break; + // } + if (!result.IsValid()) return {}; - m_object_instance_sp = StructuredData::GenericSP(new StructuredPythonObject(std::move(result))); + m_object_instance_sp = StructuredData::GenericSP( + new StructuredPythonObject(std::move(result))); return m_object_instance_sp; } - + ~ScriptedPythonInterface() override = default; protected: diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index 1721565b3e2f..13f06863075d 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -29,38 +29,40 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedThreadInterface(), ScriptedPythonInterface(interpreter) {} -//StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject( -// const llvm::StringRef class_name, ExecutionContext &exe_ctx, -// StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) { -// if (class_name.empty() && !script_obj) -// return {}; +// StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject( +// const llvm::StringRef class_name, ExecutionContext &exe_ctx, +// StructuredData::DictionarySP args_sp, StructuredData::Generic +// *script_obj) { +// if (class_name.empty() && !script_obj) +// return {}; // -// StructuredDataImpl args_impl(args_sp); -// std::string error_string; +// StructuredDataImpl args_impl(args_sp); +// std::string error_string; // -// Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, -// Locker::FreeLock); +// Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, +// Locker::FreeLock); // -// PythonObject ret_val; +// PythonObject ret_val; // -// if (!script_obj) { -// lldb::ExecutionContextRefSP exe_ctx_ref_sp = -// std::make_shared(exe_ctx); -// ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( -// class_name.str().c_str(), m_interpreter.GetDictionaryName(), -// exe_ctx_ref_sp, args_impl, error_string); -// } else -// ret_val = PythonObject(PyRefType::Borrowed, -// static_cast(script_obj->GetValue())); +// if (!script_obj) { +// lldb::ExecutionContextRefSP exe_ctx_ref_sp = +// std::make_shared(exe_ctx); +// ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedObject( +// class_name.str().c_str(), m_interpreter.GetDictionaryName(), +// exe_ctx_ref_sp, args_impl, error_string); +// } else +// ret_val = PythonObject(PyRefType::Borrowed, +// static_cast(script_obj->GetValue())); // -// if (!ret_val) -// return {}; +// if (!ret_val) +// return {}; // -// m_object_instance_sp = -// StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val))); +// m_object_instance_sp = +// StructuredData::GenericSP(new +// StructuredPythonObject(std::move(ret_val))); // -// return m_object_instance_sp; -//} +// return m_object_instance_sp; +// } lldb::tid_t ScriptedThreadPythonInterface::GetThreadID() { Status error; diff --git a/lldb/source/Target/ThreadPlanPython.cpp b/lldb/source/Target/ThreadPlanPython.cpp index d7e0d59ea205..ca1ea9bbcb90 100644 --- a/lldb/source/Target/ThreadPlanPython.cpp +++ b/lldb/source/Target/ThreadPlanPython.cpp @@ -32,14 +32,13 @@ ThreadPlanPython::ThreadPlanPython(Thread &thread, const char *class_name, eVoteNoOpinion, eVoteNoOpinion), m_class_name(class_name), m_args_data(args_data), m_did_push(false), m_stop_others(false) { - ScriptInterpreter* interpreter = GetScriptInterpreter(); + ScriptInterpreter *interpreter = GetScriptInterpreter(); if (!interpreter) { SetPlanComplete(false); // FIXME: error handling return; } - m_interface = interpreter->CreateScriptedThreadPlanInterface(); SetIsControllingPlan(true); SetOkayToDiscard(true); @@ -90,8 +89,8 @@ bool ThreadPlanPython::ShouldStop(Event *event_ptr) { if (script_interp) { bool script_error; should_stop = m_interface->ShouldStop(event_ptr); -// if (script_error) -// SetPlanComplete(false); + // if (script_error) + // SetPlanComplete(false); } } return should_stop; @@ -108,8 +107,8 @@ bool ThreadPlanPython::IsPlanStale() { if (script_interp) { bool script_error; is_stale = m_interface->IsStale(); -// if (script_error) -// SetPlanComplete(false); + // if (script_error) + // SetPlanComplete(false); } } return is_stale; @@ -126,8 +125,8 @@ bool ThreadPlanPython::DoPlanExplainsStop(Event *event_ptr) { if (script_interp) { bool script_error; explains_stop = m_interface->ExplainsStop(event_ptr); -// if (script_error) -// SetPlanComplete(false); + // if (script_error) + // SetPlanComplete(false); } } return explains_stop; ``````````
https://github.com/llvm/llvm-project/pull/68052 From lldb-commits at lists.llvm.org Mon Oct 2 17:41:17 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:41:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b632d.170a0220.d7588.0494@mx.google.com> ================ @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) ---------------- jeffreytan81 wrote: Good point. Will remove. https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 17:41:17 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:41:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b632d.a70a0220.f0d19.07d3@mx.google.com> ================ @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) ---------------- jeffreytan81 wrote: lol, this is an old diff that I drafted several months ago so totally forgot why I added this in the first place :-) https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 17:45:42 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:45:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b6436.050a0220.43584.063e@mx.google.com> https://github.com/jeffreytan81 updated https://github.com/llvm/llvm-project/pull/68012 >From e7e47a211ebaaa0f6380810b6573fadde12ca02d Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 10:53:17 -0700 Subject: [PATCH 1/4] Implement data formatters for LibStdC++ std::variant --- lldb/examples/synthetic/gnu_libstdcpp.py | 89 +++++++++++++++++++ .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 18 +++- .../libstdcpp/variant/Makefile | 5 ++ .../TestDataFormatterLibStdcxxVariant.py | 72 +++++++++++++++ .../libstdcpp/variant/main.cpp | 79 ++++++++++++++++ 5 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..7462db744674682 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,92 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not ( + index_obj + and index_obj.IsValid() + and data_obj + and data_obj.IsValid() + ): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..d8a30729b6d02e1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,11 +332,11 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), context, identifier); @@ -372,7 +372,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +388,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1104,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1153,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..88be87a5469e196 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,72 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +USE_LIBSTDCPP = "USE_LIBSTDCPP" + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build(dictionary={USE_LIBSTDCPP: "1"}) + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + self.assertEqual(3 + 4, 7) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} >From 53d20815eadd368ff1185af520bcb4aa75c9e96e Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 14:18:14 -0700 Subject: [PATCH 2/4] Fix formatter --- lldb/examples/synthetic/gnu_libstdcpp.py | 7 +------ .../Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 7462db744674682..29c926167fb440c 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -898,12 +898,7 @@ def VariantSummaryProvider(valobj, dict): raw_obj = valobj.GetNonSyntheticValue() index_obj = raw_obj.GetChildMemberWithName("_M_index") data_obj = raw_obj.GetChildMemberWithName("_M_u") - if not ( - index_obj - and index_obj.IsValid() - and data_obj - and data_obj.IsValid() - ): + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): return "" def get_variant_npos_value(index_byte_size): diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index d8a30729b6d02e1..a285864ca2e1229 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1105,10 +1105,10 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); cpp_category_sp->AddTypeSynthetic( - "^std::variant<.+>$", eFormatterMatchRegex, - SyntheticChildrenSP(new ScriptedSyntheticChildren( - stl_synth_flags, - "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); >From eebb06c9b893798a1f2b22502bf32dad48b644eb Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 14:31:36 -0700 Subject: [PATCH 3/4] Fix formatter again --- .../source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index a285864ca2e1229..ad6d627938c0520 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -336,10 +336,8 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success - = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), - context, - identifier); + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier( + path_str.c_str(), context, identifier); if (!success) return m_full.GetStringRef().contains(path); >From a5e34979045b5c612687f8b11367a863a12e5eef Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 2 Oct 2023 17:45:05 -0700 Subject: [PATCH 4/4] Remove unnecessary test code --- .../libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py index 88be87a5469e196..7a433fea5feca23 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -8,21 +8,18 @@ from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil -USE_LIBSTDCPP = "USE_LIBSTDCPP" - class LibStdcxxVariantDataFormatterTestCase(TestBase): @add_test_categories(["libstdcxx"]) def test_with_run_command(self): """Test LibStdC++ std::variant data formatter works correctly.""" - self.build(dictionary={USE_LIBSTDCPP: "1"}) + self.build() (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( self, "// break here", lldb.SBFileSpec("main.cpp", False) ) lldbutil.continue_to_breakpoint(self.process, bkpt) - self.assertEqual(3 + 4, 7) self.expect( "frame variable v1", From lldb-commits at lists.llvm.org Mon Oct 2 17:46:45 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:46:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] 1ec4330 - Implement data formatters for LibStdC++ std::variant (#68012) Message-ID: <651b6475.170a0220.c9f52.056e@mx.google.com> Author: jeffreytan81 Date: 2023-10-02T17:46:41-07:00 New Revision: 1ec4330f7e4d9b1ed1880c957e9e897f4eaf204f URL: https://github.com/llvm/llvm-project/commit/1ec4330f7e4d9b1ed1880c957e9e897f4eaf204f DIFF: https://github.com/llvm/llvm-project/commit/1ec4330f7e4d9b1ed1880c957e9e897f4eaf204f.diff LOG: Implement data formatters for LibStdC++ std::variant (#68012) This patch implements the data formatters for LibStdC++ `std::variant`. --------- Co-authored-by: jeffreytan81 Added: lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp Modified: lldb/examples/synthetic/gnu_libstdcpp.py lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp Removed: ################################################################################ diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index 825b7f3787a010d..29c926167fb440c 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -892,3 +892,87 @@ def update(self): except: pass return False + + +def VariantSummaryProvider(valobj, dict): + raw_obj = valobj.GetNonSyntheticValue() + index_obj = raw_obj.GetChildMemberWithName("_M_index") + data_obj = raw_obj.GetChildMemberWithName("_M_u") + if not (index_obj and index_obj.IsValid() and data_obj and data_obj.IsValid()): + return "" + + def get_variant_npos_value(index_byte_size): + if index_byte_size == 1: + return 0xFF + elif index_byte_size == 2: + return 0xFFFF + else: + return 0xFFFFFFFF + + npos_value = get_variant_npos_value(index_obj.GetByteSize()) + index = index_obj.GetValueAsUnsigned(0) + if index == npos_value: + return " No Value" + + active_type = data_obj.GetType().GetTemplateArgumentType(index) + return f" Active Type = {active_type.GetDisplayTypeName()} " + + +class VariantSynthProvider: + def __init__(self, valobj, dict): + self.raw_obj = valobj.GetNonSyntheticValue() + self.is_valid = False + self.index = None + self.data_obj = None + + def update(self): + try: + self.index = self.raw_obj.GetChildMemberWithName( + "_M_index" + ).GetValueAsSigned(-1) + self.is_valid = self.index != -1 + self.data_obj = self.raw_obj.GetChildMemberWithName("_M_u") + except: + self.is_valid = False + return False + + def has_children(self): + return True + + def num_children(self): + return 1 if self.is_valid else 0 + + def get_child_index(self, name): + return 0 + + def get_child_at_index(self, index): + if not self.is_valid: + return None + cur = 0 + node = self.data_obj + while cur < self.index: + node = node.GetChildMemberWithName("_M_rest") + cur += 1 + + # _M_storage's type depends on variant field's type "_Type". + # 1. if '_Type' is literal type: _Type _M_storage. + # 2. otherwise, __gnu_cxx::__aligned_membuf<_Type> _M_storage. + # + # For 2. we have to cast it to underlying template _Type. + + value = node.GetChildMemberWithName("_M_first").GetChildMemberWithName( + "_M_storage" + ) + template_type = value.GetType().GetTemplateArgumentType(0) + + # Literal type will return None for GetTemplateArgumentType(0) + if ( + template_type + and "__gnu_cxx::__aligned_membuf" in value.GetType().GetDisplayTypeName() + and template_type.IsValid() + ): + value = value.Cast(template_type) + + if value.IsValid(): + return value.Clone("Value") + return None diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c1743a5e0a418dd..ad6d627938c0520 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -332,14 +332,12 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { // If we can't parse the incoming name, then just check that it contains path. if (m_parse_error) return m_full.GetStringRef().contains(path); - + llvm::StringRef identifier; llvm::StringRef context; std::string path_str = path.str(); - bool success - = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(), - context, - identifier); + bool success = CPlusPlusLanguage::ExtractContextAndIdentifier( + path_str.c_str(), context, identifier); if (!success) return m_full.GetStringRef().contains(path); @@ -372,7 +370,7 @@ bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { return false; if (haystack.empty() || !isalnum(haystack.back())) return true; - + return false; } @@ -388,7 +386,7 @@ bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { return true; } -bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, +bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); @@ -1104,6 +1102,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + cpp_category_sp->AddTypeSynthetic( + "^std::variant<.+>$", eFormatterMatchRegex, + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSynthProvider"))); stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); @@ -1148,6 +1151,11 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP(new ScriptSummaryFormat( stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); + cpp_category_sp->AddTypeSummary( + "^std::variant<.+>$", eFormatterMatchRegex, + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.VariantSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile new file mode 100644 index 000000000000000..104f82809c7a35b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBSTDCPP := 1 +CXXFLAGS_EXTRAS := -std=c++17 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py new file mode 100644 index 000000000000000..7a433fea5feca23 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -0,0 +1,69 @@ +""" +Test lldb data formatter for LibStdC++ std::variant. +""" + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibStdcxxVariantDataFormatterTestCase(TestBase): + @add_test_categories(["libstdcxx"]) + def test_with_run_command(self): + """Test LibStdC++ std::variant data formatter works correctly.""" + self.build() + + (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp", False) + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = int {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v1_ref", + substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], + ) + + self.expect( + "frame variable v_v1", + substrs=[ + "v_v1 = Active Type = std::variant {", + "Value = Active Type = int {", + "Value = 12", + "}", + "}", + ], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v1", + substrs=["v1 = Active Type = double {", "Value = 2", "}"], + ) + + lldbutil.continue_to_breakpoint(self.process, bkpt) + + self.expect( + "frame variable v2", + substrs=["v2 = Active Type = double {", "Value = 2", "}"], + ) + + self.expect( + "frame variable v3", + substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], + ) + + self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) + + self.expect( + "frame variable v_many_types_no_value", + substrs=["v_many_types_no_value = No Value"], + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp new file mode 100644 index 000000000000000..545318f9358b673 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +struct S { + operator int() { throw 42; } +}; + +int main() { + bool has_variant = true; + + printf("%d\n", has_variant); // break here + + std::variant v1; + std::variant &v1_ref = v1; + std::variant v2; + std::variant v3; + std::variant> v_v1; + std::variant v_no_value; + // The next variant has many types, meaning the type index does not fit in + // a byte and must be `unsigned short` instead of `unsigned char` when + // using the unstable libc++ ABI. With stable libc++ ABI, the type index + // is always just `unsigned int`. + std::variant< + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int, int, int, int, int> + v_many_types_no_value; + + v1 = 12; // v contains int + v_v1 = v1; + int i = std::get(v1); + printf("%d\n", i); // break here + + v2 = 2.0; + double d = std::get(v2); + printf("%f\n", d); + + v3 = 'A'; + char c = std::get(v3); + printf("%d\n", c); + + // Checking v1 above and here to make sure we done maintain the incorrect + // state when we change its value. + v1 = 2.0; + d = std::get(v1); + printf("%f\n", d); // break here + + try { + v_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_no_value.index()); + + try { + v_many_types_no_value.emplace<0>(S()); + } catch (...) { + } + + printf("%zu\n", v_many_types_no_value.index()); + + return 0; // break here +} From lldb-commits at lists.llvm.org Mon Oct 2 17:46:48 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Mon, 02 Oct 2023 17:46:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651b6478.620a0220.5f771.06fc@mx.google.com> https://github.com/jeffreytan81 closed https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Mon Oct 2 18:50:44 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Mon, 02 Oct 2023 18:50:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb/test] Update TestProgressReporting.py (NFC) (PR #68053) In-Reply-To: Message-ID: <651b7374.170a0220.c60c4.0883@mx.google.com> https://github.com/medismailben closed https://github.com/llvm/llvm-project/pull/68053 From lldb-commits at lists.llvm.org Mon Oct 2 23:18:40 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Mon, 02 Oct 2023 23:18:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651bb240.630a0220.aac22.1584@mx.google.com> antmox wrote: Hello, It looks like this broke 2 bots: lldb-aarch64-ubuntu : https://lab.llvm.org/buildbot/#/builders/96/builds/46436 lldb-arm-ubuntu : https://lab.llvm.org/buildbot/#/builders/17/builds/44011 Could you please take a look ? https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Tue Oct 3 01:53:17 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Tue, 03 Oct 2023 01:53:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651bd67d.170a0220.ed34a.1856@mx.google.com> https://github.com/kiranchandramohan approved this pull request. LGTM. Will the wrapper function stay or be removed? https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Tue Oct 3 01:53:18 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Tue, 03 Oct 2023 01:53:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651bd67e.170a0220.417db.1bbc@mx.google.com> https://github.com/kiranchandramohan edited https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Tue Oct 3 01:53:23 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Tue, 03 Oct 2023 01:53:23 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651bd683.a70a0220.17158.1ad2@mx.google.com> ================ @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_simple +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) ---------------- kiranchandramohan wrote: Could you convert the captured variables (eg. wrapperfn) to caps? This is to distinguish them easily from code. https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Tue Oct 3 02:14:41 2023 From: lldb-commits at lists.llvm.org (Stanislav Mekhanoshin via lldb-commits) Date: Tue, 03 Oct 2023 02:14:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651bdb81.170a0220.6ce6.1e44@mx.google.com> https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/67878 >From fa8ef5779a36039c345063b58c302f8b3951ee65 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Sat, 30 Sep 2023 01:31:11 -0700 Subject: [PATCH 1/2] [AMDGPU] Add another SIFoldOperands instance after shrink There is no fold operands pass past the shrink and at the same time there are limited attempts to do shrinking right inside folding. We seem to need to run shrinking before folding, hence this patch. I can see some clear benefits in the tests we have. I also need this for a future patch. We could extend our efforts to do shrinkig inside folding, but at the end it will just result in the recreation of the the shrinking pass there. As an alternative I have tried to move previous instance of the folding past the shrink, but the result is not as good as here and there were few regressions. We may have some light compile time regressions, hence it is disabled at -O1. --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 + .../CodeGen/AMDGPU/GlobalISel/add.v2i16.ll | 5 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 161 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll | 8 +- .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll | 16 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 4 +- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 161 +- .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll | 14 +- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 6 +- .../CodeGen/AMDGPU/ds-combine-large-stride.ll | 46 +- .../CodeGen/AMDGPU/integer-mad-patterns.ll | 3 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 2 + .../AMDGPU/promote-constOffset-to-imm.ll | 106 +- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 9 +- .../CodeGen/AMDGPU/spill-scavenge-offset.ll | 1298 ++++++++--------- llvm/test/CodeGen/AMDGPU/srem64.ll | 9 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 3 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 3 +- 18 files changed, 885 insertions(+), 971 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index bcbc03eb2559c4f..a674c52667c684b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1199,6 +1199,8 @@ void GCNPassConfig::addMachineSSAOptimization() { } addPass(&DeadMachineInstructionElimID); addPass(createSIShrinkInstructionsPass()); + if (TM->getOptLevel() > CodeGenOptLevel::Less) + addPass(&SIFoldOperandsID); } bool GCNPassConfig::addILPOpts() { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll index 26d1fbb09210c64..e9f30e8503b310e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -165,9 +165,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { ; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_movk_i32 s4, 0xffc0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, 0xffffffc0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index cded5c94edf8cc3..c78d4533f4ddd3f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -231,14 +231,12 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v1, v5, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v4 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 @@ -246,8 +244,8 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -512,15 +510,15 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v1, v10, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v8 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8 @@ -1265,19 +1263,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v2i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -1286,19 +1282,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX8-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4 -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 0x7fffffff, v2 ; GFX8-NEXT: v_max_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 @@ -1383,26 +1377,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v7, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v6, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v7, vcc, s5, v7 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s4, v6 +; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0x80000000, v7 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x7fffffff, v6 ; GFX6-NEXT: v_max_i32_e32 v3, v7, v3 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 ; GFX6-NEXT: v_max_i32_e32 v4, v6, v4 ; GFX6-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_min_i32_e32 v4, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 @@ -1411,26 +1404,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v3i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v7, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v6, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s5, v7 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s4, v6 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 0x80000000, v7 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x7fffffff, v6 ; GFX8-NEXT: v_max_i32_e32 v3, v7, v3 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v1 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 ; GFX8-NEXT: v_max_i32_e32 v4, v6, v4 ; GFX8-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_min_i32_e32 v4, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_max_i32_e32 v3, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 0x7fffffff, v3 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 @@ -1536,26 +1528,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v4i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v9, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v9, vcc, s5, v9 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0x80000000, v9 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v9, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x80000000, v8 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v5, v8, v5 ; GFX6-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX6-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v3 @@ -1571,26 +1561,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v9, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v8, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v9, vcc, s5, v9 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s4, v8 +; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 0x80000000, v9 +; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x7fffffff, v8 ; GFX8-NEXT: v_max_i32_e32 v4, v9, v4 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX8-NEXT: v_min_i32_e32 v8, 0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v4 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s5, v8 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x80000000, v8 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v5, v8, v5 ; GFX8-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX8-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v3 @@ -1724,34 +1712,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v5i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_min_i32_e32 v12, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 +; GFX6-NEXT: v_min_i32_e32 v11, 0, v0 ; GFX6-NEXT: v_max_i32_e32 v10, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v12, vcc, s5, v12 -; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s4, v10 -; GFX6-NEXT: v_max_i32_e32 v5, v12, v5 +; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0x80000000, v11 +; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x7fffffff, v10 +; GFX6-NEXT: v_max_i32_e32 v5, v11, v5 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX6-NEXT: v_min_i32_e32 v10, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 +; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x80000000, v10 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v10, v6 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v7 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v3 -; GFX6-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v3 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v11, v5 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v8 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v4 @@ -1767,34 +1753,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v5i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_min_i32_e32 v12, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 +; GFX8-NEXT: v_min_i32_e32 v11, 0, v0 ; GFX8-NEXT: v_max_i32_e32 v10, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v12, vcc, s5, v12 -; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s4, v10 -; GFX8-NEXT: v_max_i32_e32 v5, v12, v5 +; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 0x80000000, v11 +; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x7fffffff, v10 +; GFX8-NEXT: v_max_i32_e32 v5, v11, v5 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX8-NEXT: v_min_i32_e32 v10, 0, v1 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s5, v10 +; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x80000000, v10 ; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v10, v6 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v6, v7 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v3 -; GFX8-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v3 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v11, v5 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v6, v8 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v4 @@ -2766,13 +2750,11 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 @@ -2780,8 +2762,8 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -2978,13 +2960,11 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: saddsat_v2i16_vs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s3, 1 ; GFX6-NEXT: v_min_i32_e32 v3, 0, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s3, v3 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, s0, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 @@ -2992,8 +2972,8 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s3, v3 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, s0, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -3059,14 +3039,14 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v10, 0, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v10, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 @@ -4847,8 +4827,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4871,8 +4850,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4895,8 +4873,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll index ab000d91a3ef23d..7f7788de6a7e1fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -295,7 +295,7 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x1000, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 @@ -345,7 +345,7 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { ; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0 ; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s8, v1 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7] @@ -437,7 +437,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 @@ -486,7 +486,7 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, s8, v0 ; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v1 +; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll index 88ace1c51f5b023..6aff6200acff9f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -312,13 +312,13 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 @@ -399,10 +399,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -443,13 +443,13 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index d0c55c69f508775..a89f01d62afa71f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1078,7 +1078,7 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v0 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 @@ -1699,7 +1699,7 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 65455d754be4f53..345982c1c693317 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -231,23 +231,21 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v1, v4, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v5 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -506,20 +504,20 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 ; GFX6-NEXT: v_max_i32_e32 v1, v8, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v10 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 @@ -1265,19 +1263,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v2i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -1286,19 +1282,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5 ; GFX8-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_max_i32_e32 v2, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2 ; GFX8-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4 ; GFX8-NEXT: v_max_i32_e32 v2, v2, v3 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v2 @@ -1383,26 +1377,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v6, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s4, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6 ; GFX6-NEXT: v_min_i32_e32 v7, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, s5, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 0x80000000, v7 ; GFX6-NEXT: v_max_i32_e32 v3, v6, v3 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v7 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v4 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v4 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 @@ -1411,26 +1404,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v3i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v6, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s4, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6 ; GFX8-NEXT: v_min_i32_e32 v7, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s5, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 0x80000000, v7 ; GFX8-NEXT: v_max_i32_e32 v3, v6, v3 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v7 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_max_i32_e32 v3, -1, v1 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v3, v3, v4 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_max_i32_e32 v3, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3 ; GFX8-NEXT: v_min_i32_e32 v4, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4 ; GFX8-NEXT: v_max_i32_e32 v3, v3, v5 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v4 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 @@ -1536,26 +1528,24 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v4i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v9, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, s5, v9 +; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, 0x80000000, v9 ; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v9 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v8, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v6 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -1571,26 +1561,24 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v8, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s4, v8 +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8 ; GFX8-NEXT: v_min_i32_e32 v9, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, s5, v9 +; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, 0x80000000, v9 ; GFX8-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v9 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v4 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v8, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s5, v8 +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v4 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v5, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v6 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v4 @@ -1724,32 +1712,30 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v5i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v10, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s4, v10 -; GFX6-NEXT: v_min_i32_e32 v12, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, s5, v12 +; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10 +; GFX6-NEXT: v_min_i32_e32 v11, -1, v0 +; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 0x80000000, v11 ; GFX6-NEXT: v_max_i32_e32 v5, v10, v5 -; GFX6-NEXT: v_min_i32_e32 v5, v5, v12 +; GFX6-NEXT: v_min_i32_e32 v5, v5, v11 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 +; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x80000000, v10 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v6 -; GFX6-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v3 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v3 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v8 @@ -1767,32 +1753,30 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v5i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v10, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s4, v10 -; GFX8-NEXT: v_min_i32_e32 v12, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v12, vcc, s5, v12 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10 +; GFX8-NEXT: v_min_i32_e32 v11, -1, v0 +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 0x80000000, v11 ; GFX8-NEXT: v_max_i32_e32 v5, v10, v5 -; GFX8-NEXT: v_min_i32_e32 v5, v5, v12 +; GFX8-NEXT: v_min_i32_e32 v5, v5, v11 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 ; GFX8-NEXT: v_min_i32_e32 v10, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s5, v10 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x80000000, v10 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v7 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v6 -; GFX8-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v3 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v3 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v8 @@ -2766,22 +2750,20 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -2978,22 +2960,20 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: ssubsat_v2i16_vs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_brev_b32 s3, 1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 ; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 ; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -3056,16 +3036,16 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 ; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v10 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 @@ -4847,8 +4827,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4871,8 +4850,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4895,8 +4873,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll index 48f05a33f03649c..3af5ac98658ddac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -253,13 +253,13 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc @@ -284,13 +284,13 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { ; CGP-NEXT: v_mul_lo_u32 v2, v2, s4 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 097f6642cbc669b..f5b27906ed67ed8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1068,7 +1068,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7] ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] @@ -1295,7 +1295,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, 0x12d8fb, v2 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[8:9] ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] @@ -1530,7 +1530,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 0x12d8fb, v2 ; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[8:9] ; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll index aa1d44c31606b8f..726203c4156708b 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -5,9 +5,9 @@ ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -50,8 +50,8 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x400, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x800, [[BASE]] @@ -94,9 +94,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -173,9 +173,9 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[B1:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 @@ -209,7 +209,7 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 @@ -247,9 +247,9 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[B1:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 @@ -283,9 +283,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -319,9 +319,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -380,9 +380,9 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], vcc, 4, [[BASE]] +; VI-DAG: v_add_u32_e64 [[BASE:v[0-9]+]], vcc, [[ARG]], 4 ; GFX9-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], 4, [[BASE]] ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 @@ -409,7 +409,7 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 @@ -438,9 +438,9 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[BASE]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[BASE:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[BASE]], 8, [[BASE]] ; GCN-DAG: ds_write2st64_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll index 61017e809c86365..70a7f67f5b8d0d6 100644 --- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll @@ -3273,9 +3273,8 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v6 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v0, v2 ; GFX67-SDAG-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX67-SDAG-NEXT: s_movk_i32 s4, 0x100 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, s4, v3 +; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0x100, v3 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v1 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index b939c8d2e339de4..25047121f31ff61 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -920,6 +920,7 @@ ; GCN-O2-NEXT: SI Fold Operands ; GCN-O2-NEXT: Remove dead machine instructions ; GCN-O2-NEXT: SI Shrink Instructions +; GCN-O2-NEXT: SI Fold Operands ; GCN-O2-NEXT: Register Usage Information Propagation ; GCN-O2-NEXT: Detect Dead Lanes ; GCN-O2-NEXT: Remove dead machine instructions @@ -1238,6 +1239,7 @@ ; GCN-O3-NEXT: SI Fold Operands ; GCN-O3-NEXT: Remove dead machine instructions ; GCN-O3-NEXT: SI Shrink Instructions +; GCN-O3-NEXT: SI Fold Operands ; GCN-O3-NEXT: Register Usage Information Propagation ; GCN-O3-NEXT: Detect Dead Lanes ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index a462c19ce645d4a..c2b10c160bf586a 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -54,13 +54,11 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: v_add_u32_e32 v13, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x2800 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, 0x2800, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[13:14], v[13:14] ; GFX8-NEXT: flat_load_dwordx2 v[15:16], v[15:16] -; GFX8-NEXT: s_movk_i32 s0, 0x3000 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x3000, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[17:18], v[17:18] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3800, v0 @@ -132,8 +130,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX900-NEXT: global_load_dwordx2 v[12:13], v[10:11], off offset:2048 ; GFX900-NEXT: global_load_dwordx2 v[14:15], v[6:7], off ; GFX900-NEXT: global_load_dwordx2 v[16:17], v[6:7], off offset:2048 -; GFX900-NEXT: s_movk_i32 s0, 0x3000 -; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX900-NEXT: global_load_dwordx2 v[10:11], v[0:1], off offset:2048 @@ -276,8 +273,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: global_load_dwordx2 v[12:13], v[10:11], off offset:2048 ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[6:7], off ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[6:7], off offset:2048 -; GFX90A-NEXT: s_movk_i32 s0, 0x3000 -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX90A-NEXT: global_load_dwordx2 v[10:11], v[0:1], off offset:2048 @@ -573,21 +569,17 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_mov_b32_e32 v3, s35 ; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v3, vcc -; GFX900-NEXT: s_movk_i32 s0, 0x5000 -; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s0, v1 +; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, 0x5000, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX900-NEXT: s_movk_i32 s2, 0x7f ; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: s_movk_i32 s0, 0xd000 -; GFX900-NEXT: s_movk_i32 s1, 0xe000 -; GFX900-NEXT: s_movk_i32 s3, 0xf000 +; GFX900-NEXT: s_movk_i32 s0, 0x7f ; GFX900-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX900-NEXT: ; =>This Loop Header: Depth=1 ; GFX900-NEXT: ; Child Loop BB1_2 Depth 2 ; GFX900-NEXT: v_mov_b32_e32 v6, v2 ; GFX900-NEXT: v_mov_b32_e32 v5, v1 -; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s1, 0 ; GFX900-NEXT: .LBB1_2: ; %for.body ; GFX900-NEXT: ; Parent Loop BB1_1 Depth=1 ; GFX900-NEXT: ; => This Inner Loop Header: Depth=2 @@ -599,23 +591,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: global_load_dwordx2 v[7:8], v[7:8], off ; GFX900-NEXT: v_addc_co_u32_e32 v14, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[17:18], v[13:14], off offset:-2048 -; GFX900-NEXT: v_add_co_u32_e32 v15, vcc, s0, v5 +; GFX900-NEXT: v_add_co_u32_e32 v15, vcc, 0xffffd000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v16, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[15:16], v[15:16], off offset:-2048 -; GFX900-NEXT: v_add_co_u32_e32 v19, vcc, s1, v5 +; GFX900-NEXT: v_add_co_u32_e32 v19, vcc, 0xffffe000, v5 ; GFX900-NEXT: global_load_dwordx2 v[13:14], v[13:14], off ; GFX900-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[23:24], v[19:20], off offset:-4096 ; GFX900-NEXT: global_load_dwordx2 v[25:26], v[19:20], off offset:-2048 ; GFX900-NEXT: global_load_dwordx2 v[27:28], v[19:20], off -; GFX900-NEXT: v_add_co_u32_e32 v21, vcc, s3, v5 +; GFX900-NEXT: v_add_co_u32_e32 v21, vcc, 0xfffff000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v22, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[19:20], v[21:22], off offset:-2048 ; GFX900-NEXT: global_load_dwordx2 v[29:30], v[5:6], off ; GFX900-NEXT: v_add_co_u32_e32 v5, vcc, 0x10000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX900-NEXT: s_addk_i32 s4, 0x2000 -; GFX900-NEXT: s_cmp_gt_u32 s4, 0x3fffff +; GFX900-NEXT: s_addk_i32 s1, 0x2000 +; GFX900-NEXT: s_cmp_gt_u32 s1, 0x3fffff ; GFX900-NEXT: s_waitcnt vmcnt(8) ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v4, vcc @@ -649,11 +641,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX900-NEXT: ; %bb.3: ; %while.cond.loopexit ; GFX900-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GFX900-NEXT: s_add_i32 s4, s2, -1 -; GFX900-NEXT: s_cmp_eq_u32 s2, 0 +; GFX900-NEXT: s_add_i32 s1, s0, -1 +; GFX900-NEXT: s_cmp_eq_u32 s0, 0 ; GFX900-NEXT: s_cbranch_scc1 .LBB1_5 ; GFX900-NEXT: ; %bb.4: ; in Loop: Header=BB1_1 Depth=1 -; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s0, s1 ; GFX900-NEXT: s_branch .LBB1_1 ; GFX900-NEXT: .LBB1_5: ; %while.end ; GFX900-NEXT: v_mov_b32_e32 v1, s35 @@ -805,19 +797,15 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_mov_b32_e32 v2, s35 ; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v2, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x5000 -; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v1 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x5000, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX90A-NEXT: s_movk_i32 s2, 0x7f +; GFX90A-NEXT: s_movk_i32 s0, 0x7f ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0 -; GFX90A-NEXT: s_movk_i32 s0, 0xd000 -; GFX90A-NEXT: s_movk_i32 s1, 0xe000 -; GFX90A-NEXT: s_movk_i32 s3, 0xf000 ; GFX90A-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB1_2 Depth 2 ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[0,1] -; GFX90A-NEXT: s_mov_b32 s4, 0 +; GFX90A-NEXT: s_mov_b32 s1, 0 ; GFX90A-NEXT: .LBB1_2: ; %for.body ; GFX90A-NEXT: ; Parent Loop BB1_1 Depth=1 ; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2 @@ -829,23 +817,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: global_load_dwordx2 v[12:13], v[12:13], off ; GFX90A-NEXT: v_addc_co_u32_e32 v15, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[18:19], v[14:15], off offset:-2048 -; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, s0, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, 0xffffd000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[16:17], off offset:-2048 -; GFX90A-NEXT: v_add_co_u32_e32 v20, vcc, s1, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v20, vcc, 0xffffe000, v6 ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[14:15], off ; GFX90A-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[24:25], v[20:21], off offset:-4096 ; GFX90A-NEXT: global_load_dwordx2 v[26:27], v[20:21], off offset:-2048 ; GFX90A-NEXT: global_load_dwordx2 v[28:29], v[20:21], off -; GFX90A-NEXT: v_add_co_u32_e32 v22, vcc, s3, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v22, vcc, 0xfffff000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[20:21], v[22:23], off offset:-2048 ; GFX90A-NEXT: global_load_dwordx2 v[30:31], v[6:7], off ; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, 0x10000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc -; GFX90A-NEXT: s_addk_i32 s4, 0x2000 -; GFX90A-NEXT: s_cmp_gt_u32 s4, 0x3fffff +; GFX90A-NEXT: s_addk_i32 s1, 0x2000 +; GFX90A-NEXT: s_cmp_gt_u32 s1, 0x3fffff ; GFX90A-NEXT: s_waitcnt vmcnt(8) ; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, v12, v4 ; GFX90A-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc @@ -879,11 +867,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX90A-NEXT: ; %bb.3: ; %while.cond.loopexit ; GFX90A-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GFX90A-NEXT: s_add_i32 s4, s2, -1 -; GFX90A-NEXT: s_cmp_eq_u32 s2, 0 +; GFX90A-NEXT: s_add_i32 s1, s0, -1 +; GFX90A-NEXT: s_cmp_eq_u32 s0, 0 ; GFX90A-NEXT: s_cbranch_scc1 .LBB1_5 ; GFX90A-NEXT: ; %bb.4: ; in Loop: Header=BB1_1 Depth=1 -; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s0, s1 ; GFX90A-NEXT: s_branch .LBB1_1 ; GFX90A-NEXT: .LBB1_5: ; %while.end ; GFX90A-NEXT: v_mov_b32_e32 v1, s35 @@ -1163,10 +1151,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x1800 ; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x1c00 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x1c00, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: flat_load_dword v2, v[0:1] ; GFX8-NEXT: flat_load_dword v19, v[5:6] ; GFX8-NEXT: flat_load_dword v7, v[7:8] @@ -1175,7 +1161,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-NEXT: flat_load_dword v10, v[13:14] ; GFX8-NEXT: flat_load_dword v11, v[15:16] ; GFX8-NEXT: flat_load_dword v12, v[17:18] -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x2000, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x2400, v0 ; GFX8-NEXT: flat_load_dword v5, v[5:6] @@ -1230,10 +1216,9 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s34, v4 ; GFX900-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v0, vcc ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 2, v[1:2] -; GFX900-NEXT: s_movk_i32 s0, 0x1000 ; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc -; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dword v5, v[0:1], off ; GFX900-NEXT: global_load_dword v6, v[0:1], off offset:1024 @@ -1357,8 +1342,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3] ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, v5, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v1, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dword v5, v[0:1], off ; GFX90A-NEXT: global_load_dword v6, v[0:1], off offset:1024 @@ -1526,10 +1510,9 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0xf000 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0xf800 ; GFX8-NEXT: flat_load_dwordx2 v[7:8], v[0:1] ; GFX8-NEXT: flat_load_dwordx2 v[5:6], v[5:6] -; GFX8-NEXT: v_add_u32_e32 v9, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xfffff800, v0 ; GFX8-NEXT: v_addc_u32_e32 v10, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[9:10], v[9:10] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 @@ -1804,11 +1787,9 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc -; GFX8-NEXT: s_mov_b32 s0, 0x7ffff800 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7ffff800, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc -; GFX8-NEXT: s_mov_b32 s0, 0x7ffffc00 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x7ffffc00, v0 ; GFX8-NEXT: v_addc_u32_e32 v8, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dword v2, v[0:1] ; GFX8-NEXT: flat_load_dword v5, v[5:6] @@ -2348,13 +2329,11 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: v_add_u32_e32 v13, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x1800 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, 0x1800, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[13:14], v[13:14] ; GFX8-NEXT: flat_load_dwordx2 v[15:16], v[15:16] -; GFX8-NEXT: s_movk_i32 s0, 0x1000 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x1000, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[17:18], v[17:18] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x800, v0 @@ -2424,8 +2403,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_add_co_u32_e32 v4, vcc, s0, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[10:11], v[4:5], off offset:2048 -; GFX900-NEXT: s_movk_i32 s0, 0x1000 -; GFX900-NEXT: v_add_co_u32_e32 v12, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v12, vcc, 0x1000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[14:15], v[12:13], off ; GFX900-NEXT: global_load_dwordx2 v[16:17], v[4:5], off @@ -2571,8 +2549,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, s0, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[10:11], v[4:5], off offset:2048 -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v12, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v12, vcc, 0x1000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[12:13], off ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[4:5], off @@ -2743,8 +2720,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v4, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x800 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x800, v2 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v6, vcc ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0, v2 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, -1, v6, vcc @@ -2784,10 +2760,9 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s34, v8 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v0, vcc ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 3, v[1:2] -; GFX900-NEXT: s_movk_i32 s0, 0x1000 ; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, v3, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, v4, v1, vcc -; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v3, vcc ; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc @@ -2871,8 +2846,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 3, v[2:3] ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, v4, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v1, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v3, vcc ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 705a2af73959065..7361d9d9f795e39 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1394,8 +1394,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v4, v5 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, s6, v8 +; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, 0xffffffc5, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v6, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[5:6] @@ -1587,8 +1586,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v4, v5 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, s6, v8 +; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, 0xffffffd0, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v6, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[5:6] @@ -1722,13 +1720,12 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[9:10], v[9:10], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v0, 31, v4 ; GCN-IR-NEXT: v_or_b32_e32 v0, v9, v0 -; GCN-IR-NEXT: v_sub_i32_e32 v5, vcc, s12, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v5, vcc, 0x7fff, v0 ; GCN-IR-NEXT: v_subb_u32_e32 v5, vcc, 0, v10, vcc ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v7 ; GCN-IR-NEXT: v_lshl_b64 v[3:4], v[3:4], 1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index 08db1e7fee259d6..dd4eb0ae2a09ed6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -5000,22 +5000,14 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 ; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 ; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x80 +; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 ; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, s2, v5 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x84 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x204 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x284 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 @@ -5043,1268 +5035,1268 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v2 ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x74 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x100 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x180 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x200 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x174 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x280 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1f4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x300 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x274 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x380 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2f4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x400 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x374 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3f4 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x404 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x414 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x424 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x434 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x444 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x454 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x464 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x474 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x484 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x494 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x504 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x514 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x524 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x534 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x544 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x554 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x564 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x574 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x584 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x594 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x604 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x614 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x624 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x634 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x644 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x654 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x664 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x674 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x684 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x694 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x704 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x714 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x724 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x734 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x744 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x754 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x764 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x774 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x784 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x794 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x804 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x814 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x824 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x834 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x844 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x854 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x864 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x874 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x884 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x894 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x904 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x914 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x924 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x934 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x944 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x954 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x964 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x974 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x984 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x994 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 ; GFX9-FLATSCR-NEXT: ;;#ASMSTART @@ -7346,7 +7338,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7380,7 +7372,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7414,7 +7406,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7448,7 +7440,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7482,7 +7474,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7516,7 +7508,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7550,7 +7542,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 613349f32e2d5ad..f8898ddf9290bcd 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1583,8 +1583,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, s6, v6 +; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc5, v6 ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] @@ -1774,8 +1773,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, s6, v6 +; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, 0xffffffd0, v6 ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] @@ -1914,13 +1912,12 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 ; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, s12, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 0x7fff, v10 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 ; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc ; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index c5ab44e31c0320d..ea938d5a121a24c 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1283,13 +1283,12 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[7:8], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GCN-IR-NEXT: v_or_b32_e32 v6, v7, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 0x7fff, v6 ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v8, vcc ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 1, v0 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 894c96acbbcd6b1..c1119e0a9cc3336 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1307,13 +1307,12 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v8 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 0x7fff, v8 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 >From d5fe529e16864f4305ff30362a0923d3843570eb Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Sat, 30 Sep 2023 01:31:11 -0700 Subject: [PATCH 2/2] [AMDGPU] Add another SIFoldOperands instance after shrink There is no fold operands pass past the shrink and at the same time there are limited attempts to do shrinking right inside folding. We seem to need to run shrinking before folding, hence this patch. I can see some clear benefits in the tests we have. I also need this for a future patch. We could extend our efforts to do shrinkig inside folding, but at the end it will just result in the recreation of the the shrinking pass there. As an alternative I have tried to move previous instance of the folding past the shrink, but the result is not as good as here and there were few regressions. We may have some light compile time regressions, hence it is disabled at -O1. --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 + .../CodeGen/AMDGPU/GlobalISel/add.v2i16.ll | 5 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 161 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll | 8 +- .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll | 16 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 4 +- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 161 +- .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll | 14 +- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 6 +- .../CodeGen/AMDGPU/ds-combine-large-stride.ll | 46 +- .../CodeGen/AMDGPU/integer-mad-patterns.ll | 3 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 2 + .../AMDGPU/promote-constOffset-to-imm.ll | 106 +- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 9 +- .../CodeGen/AMDGPU/spill-scavenge-offset.ll | 1298 ++++++++--------- llvm/test/CodeGen/AMDGPU/srem64.ll | 9 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 3 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 3 +- 18 files changed, 885 insertions(+), 971 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index bcbc03eb2559c4f..a674c52667c684b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1199,6 +1199,8 @@ void GCNPassConfig::addMachineSSAOptimization() { } addPass(&DeadMachineInstructionElimID); addPass(createSIShrinkInstructionsPass()); + if (TM->getOptLevel() > CodeGenOptLevel::Less) + addPass(&SIFoldOperandsID); } bool GCNPassConfig::addILPOpts() { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll index 26d1fbb09210c64..e9f30e8503b310e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -165,9 +165,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { ; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_movk_i32 s4, 0xffc0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, 0xffffffc0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index cded5c94edf8cc3..c78d4533f4ddd3f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -231,14 +231,12 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v1, v5, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v4 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 @@ -246,8 +244,8 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -512,15 +510,15 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v1, v10, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v8 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8 @@ -1265,19 +1263,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v2i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -1286,19 +1282,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX8-NEXT: v_min_i32_e32 v4, 0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4 -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 0x7fffffff, v2 ; GFX8-NEXT: v_max_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 @@ -1383,26 +1377,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v7, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v6, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v7, vcc, s5, v7 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s4, v6 +; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0x80000000, v7 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x7fffffff, v6 ; GFX6-NEXT: v_max_i32_e32 v3, v7, v3 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 ; GFX6-NEXT: v_max_i32_e32 v4, v6, v4 ; GFX6-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_min_i32_e32 v4, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 @@ -1411,26 +1404,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v3i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v7, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v6, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s5, v7 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s4, v6 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 0x80000000, v7 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x7fffffff, v6 ; GFX8-NEXT: v_max_i32_e32 v3, v7, v3 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v1 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 ; GFX8-NEXT: v_max_i32_e32 v4, v6, v4 ; GFX8-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_min_i32_e32 v4, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_max_i32_e32 v3, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 0x7fffffff, v3 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_min_i32_e32 v3, v4, v3 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 @@ -1536,26 +1528,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v4i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v9, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v9, vcc, s5, v9 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0x80000000, v9 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v9, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x80000000, v8 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v5, v8, v5 ; GFX6-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX6-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v3 @@ -1571,26 +1561,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 ; GFX8-NEXT: v_min_i32_e32 v9, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v8, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v9, vcc, s5, v9 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s4, v8 +; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 0x80000000, v9 +; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x7fffffff, v8 ; GFX8-NEXT: v_max_i32_e32 v4, v9, v4 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX8-NEXT: v_min_i32_e32 v8, 0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v4 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s5, v8 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x80000000, v8 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v5, v8, v5 ; GFX8-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 ; GFX8-NEXT: v_max_i32_e32 v4, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX8-NEXT: v_min_i32_e32 v4, v5, v4 ; GFX8-NEXT: v_min_i32_e32 v5, 0, v3 @@ -1724,34 +1712,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX6-LABEL: v_saddsat_v5i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_min_i32_e32 v12, 0, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 +; GFX6-NEXT: v_min_i32_e32 v11, 0, v0 ; GFX6-NEXT: v_max_i32_e32 v10, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v12, vcc, s5, v12 -; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s4, v10 -; GFX6-NEXT: v_max_i32_e32 v5, v12, v5 +; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0x80000000, v11 +; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x7fffffff, v10 +; GFX6-NEXT: v_max_i32_e32 v5, v11, v5 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX6-NEXT: v_min_i32_e32 v10, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 +; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x80000000, v10 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v10, v6 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v2 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5 +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v7 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v3 -; GFX6-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v3 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v11, v5 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_max_i32_e32 v6, v6, v8 ; GFX6-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX6-NEXT: v_min_i32_e32 v6, 0, v4 @@ -1767,34 +1753,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX8-LABEL: v_saddsat_v5i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_min_i32_e32 v12, 0, v0 -; GFX8-NEXT: s_brev_b32 s4, -2 +; GFX8-NEXT: v_min_i32_e32 v11, 0, v0 ; GFX8-NEXT: v_max_i32_e32 v10, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v12, vcc, s5, v12 -; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s4, v10 -; GFX8-NEXT: v_max_i32_e32 v5, v12, v5 +; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 0x80000000, v11 +; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x7fffffff, v10 +; GFX8-NEXT: v_max_i32_e32 v5, v11, v5 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX8-NEXT: v_min_i32_e32 v10, 0, v1 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v1 -; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s5, v10 +; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x80000000, v10 ; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v10, v6 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v2 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v2 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5 +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v6, v7 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v3 -; GFX8-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_max_i32_e32 v5, 0, v3 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v11, v5 +; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_max_i32_e32 v6, v6, v8 ; GFX8-NEXT: v_min_i32_e32 v5, v6, v5 ; GFX8-NEXT: v_min_i32_e32 v6, 0, v4 @@ -2766,13 +2750,11 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v5, 0, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 @@ -2780,8 +2762,8 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -2978,13 +2960,11 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: saddsat_v2i16_vs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s3, 1 ; GFX6-NEXT: v_min_i32_e32 v3, 0, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s3, v3 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, s0, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 @@ -2992,8 +2972,8 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 ; GFX6-NEXT: v_max_i32_e32 v2, 0, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s3, v3 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_max_i32_e32 v3, s0, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 @@ -3059,14 +3039,14 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6-NEXT: s_brev_b32 s5, 1 ; GFX6-NEXT: v_min_i32_e32 v10, 0, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v10, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_min_i32_e32 v8, 0, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5 ; GFX6-NEXT: v_max_i32_e32 v5, 0, v1 @@ -4847,8 +4827,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4871,8 +4850,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4895,8 +4873,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll index ab000d91a3ef23d..7f7788de6a7e1fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -295,7 +295,7 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x1000, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 @@ -345,7 +345,7 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { ; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0 ; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s8, v1 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7] @@ -437,7 +437,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 @@ -486,7 +486,7 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, s8, v0 ; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v1 +; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll index 88ace1c51f5b023..6aff6200acff9f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -312,13 +312,13 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 @@ -399,10 +399,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -443,13 +443,13 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index d0c55c69f508775..a89f01d62afa71f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1078,7 +1078,7 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v0 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 @@ -1699,7 +1699,7 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 65455d754be4f53..345982c1c693317 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -231,23 +231,21 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v1, v4, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v5 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -506,20 +504,20 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 ; GFX6-NEXT: v_max_i32_e32 v1, v8, v1 ; GFX6-NEXT: v_min_i32_e32 v1, v1, v10 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 @@ -1265,19 +1263,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v2i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -1286,19 +1282,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5 ; GFX8-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_max_i32_e32 v2, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2 ; GFX8-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4 ; GFX8-NEXT: v_max_i32_e32 v2, v2, v3 ; GFX8-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v2 @@ -1383,26 +1377,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v6, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s4, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6 ; GFX6-NEXT: v_min_i32_e32 v7, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, s5, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 0x80000000, v7 ; GFX6-NEXT: v_max_i32_e32 v3, v6, v3 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v7 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v4 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 ; GFX6-NEXT: v_min_i32_e32 v3, v3, v4 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 @@ -1411,26 +1404,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v3i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v6, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s4, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6 ; GFX8-NEXT: v_min_i32_e32 v7, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s5, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 0x80000000, v7 ; GFX8-NEXT: v_max_i32_e32 v3, v6, v3 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v7 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_max_i32_e32 v3, -1, v1 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v3, v3, v4 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v6 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_max_i32_e32 v3, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3 ; GFX8-NEXT: v_min_i32_e32 v4, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4 ; GFX8-NEXT: v_max_i32_e32 v3, v3, v5 ; GFX8-NEXT: v_min_i32_e32 v3, v3, v4 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 @@ -1536,26 +1528,24 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v4i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v9, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, s5, v9 +; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, 0x80000000, v9 ; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v9 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v8, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v4, v4, v6 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v5 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -1571,26 +1561,24 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v8, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s4, v8 +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8 ; GFX8-NEXT: v_min_i32_e32 v9, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, s5, v9 +; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, 0x80000000, v9 ; GFX8-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v9 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v4 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v8, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s5, v8 +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v8 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v4 ; GFX8-NEXT: v_max_i32_e32 v4, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 ; GFX8-NEXT: v_min_i32_e32 v5, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5 ; GFX8-NEXT: v_max_i32_e32 v4, v4, v6 ; GFX8-NEXT: v_min_i32_e32 v4, v4, v5 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v4 @@ -1724,32 +1712,30 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX6-LABEL: v_ssubsat_v5i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v10, -1, v0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s4, v10 -; GFX6-NEXT: v_min_i32_e32 v12, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, s5, v12 +; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10 +; GFX6-NEXT: v_min_i32_e32 v11, -1, v0 +; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 0x80000000, v11 ; GFX6-NEXT: v_max_i32_e32 v5, v10, v5 -; GFX6-NEXT: v_min_i32_e32 v5, v5, v12 +; GFX6-NEXT: v_min_i32_e32 v5, v5, v11 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 +; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x80000000, v10 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 ; GFX6-NEXT: v_min_i32_e32 v5, v5, v6 -; GFX6-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v3 -; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5 ; GFX6-NEXT: v_min_i32_e32 v6, -1, v3 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6 ; GFX6-NEXT: v_max_i32_e32 v5, v5, v8 @@ -1767,32 +1753,30 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { ; GFX8-LABEL: v_ssubsat_v5i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v10, -1, v0 -; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s4, v10 -; GFX8-NEXT: v_min_i32_e32 v12, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v12, vcc, s5, v12 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10 +; GFX8-NEXT: v_min_i32_e32 v11, -1, v0 +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 0x80000000, v11 ; GFX8-NEXT: v_max_i32_e32 v5, v10, v5 -; GFX8-NEXT: v_min_i32_e32 v5, v5, v12 +; GFX8-NEXT: v_min_i32_e32 v5, v5, v11 +; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v1 ; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 ; GFX8-NEXT: v_min_i32_e32 v10, -1, v1 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s5, v10 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x80000000, v10 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v6 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v10 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v2 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v7 ; GFX8-NEXT: v_min_i32_e32 v5, v5, v6 -; GFX8-NEXT: v_bfrev_b32_e32 v11, -2 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_max_i32_e32 v5, -1, v3 -; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5 ; GFX8-NEXT: v_min_i32_e32 v6, -1, v3 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6 ; GFX8-NEXT: v_max_i32_e32 v5, v5, v8 @@ -2766,22 +2750,20 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 ; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3 ; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -2978,22 +2960,20 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-LABEL: ssubsat_v2i16_vs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s2, -2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_brev_b32 s3, 1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 ; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 ; GFX6-NEXT: v_min_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 ; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 @@ -3056,16 +3036,16 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8 ; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 ; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 ; GFX6-NEXT: v_min_i32_e32 v4, v4, v10 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5 ; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 @@ -4847,8 +4827,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4871,8 +4850,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc @@ -4895,8 +4873,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[6:7], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll index 48f05a33f03649c..3af5ac98658ddac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) { ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -253,13 +253,13 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc @@ -284,13 +284,13 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { ; CGP-NEXT: v_mul_lo_u32 v2, v2, s4 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 097f6642cbc669b..f5b27906ed67ed8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1068,7 +1068,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v0 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7] ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] @@ -1295,7 +1295,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, 0x12d8fb, v2 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[8:9] ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] @@ -1530,7 +1530,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 0x12d8fb, v2 ; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[8:9] ; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll index aa1d44c31606b8f..726203c4156708b 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -5,9 +5,9 @@ ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -50,8 +50,8 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x400, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x800, [[BASE]] @@ -94,9 +94,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -173,9 +173,9 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[B1:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 @@ -209,7 +209,7 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 @@ -247,9 +247,9 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[B1:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 @@ -283,9 +283,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -319,9 +319,9 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] -; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]] @@ -380,9 +380,9 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], vcc, 4, [[BASE]] +; VI-DAG: v_add_u32_e64 [[BASE:v[0-9]+]], vcc, [[ARG]], 4 ; GFX9-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], 4, [[BASE]] ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 @@ -409,7 +409,7 @@ bb: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]] ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]] ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 @@ -438,9 +438,9 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 -; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GFX9: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; VI-DAG: v_add_u32_e32 [[BASE]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e64 [[BASE:v[0-9]+]], vcc, [[ARG]], 8 ; GFX9-DAG: v_add_u32_e32 [[BASE]], 8, [[BASE]] ; GCN-DAG: ds_write2st64_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll index 61017e809c86365..70a7f67f5b8d0d6 100644 --- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll @@ -3273,9 +3273,8 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v6 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v0, v2 ; GFX67-SDAG-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX67-SDAG-NEXT: s_movk_i32 s4, 0x100 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, s4, v3 +; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0x100, v3 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v1 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index b939c8d2e339de4..25047121f31ff61 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -920,6 +920,7 @@ ; GCN-O2-NEXT: SI Fold Operands ; GCN-O2-NEXT: Remove dead machine instructions ; GCN-O2-NEXT: SI Shrink Instructions +; GCN-O2-NEXT: SI Fold Operands ; GCN-O2-NEXT: Register Usage Information Propagation ; GCN-O2-NEXT: Detect Dead Lanes ; GCN-O2-NEXT: Remove dead machine instructions @@ -1238,6 +1239,7 @@ ; GCN-O3-NEXT: SI Fold Operands ; GCN-O3-NEXT: Remove dead machine instructions ; GCN-O3-NEXT: SI Shrink Instructions +; GCN-O3-NEXT: SI Fold Operands ; GCN-O3-NEXT: Register Usage Information Propagation ; GCN-O3-NEXT: Detect Dead Lanes ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index a462c19ce645d4a..c2b10c160bf586a 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -54,13 +54,11 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: v_add_u32_e32 v13, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x2800 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, 0x2800, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[13:14], v[13:14] ; GFX8-NEXT: flat_load_dwordx2 v[15:16], v[15:16] -; GFX8-NEXT: s_movk_i32 s0, 0x3000 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x3000, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[17:18], v[17:18] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3800, v0 @@ -132,8 +130,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX900-NEXT: global_load_dwordx2 v[12:13], v[10:11], off offset:2048 ; GFX900-NEXT: global_load_dwordx2 v[14:15], v[6:7], off ; GFX900-NEXT: global_load_dwordx2 v[16:17], v[6:7], off offset:2048 -; GFX900-NEXT: s_movk_i32 s0, 0x3000 -; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX900-NEXT: global_load_dwordx2 v[10:11], v[0:1], off offset:2048 @@ -276,8 +273,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: global_load_dwordx2 v[12:13], v[10:11], off offset:2048 ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[6:7], off ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[6:7], off offset:2048 -; GFX90A-NEXT: s_movk_i32 s0, 0x3000 -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX90A-NEXT: global_load_dwordx2 v[10:11], v[0:1], off offset:2048 @@ -573,21 +569,17 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_mov_b32_e32 v3, s35 ; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v3, vcc -; GFX900-NEXT: s_movk_i32 s0, 0x5000 -; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s0, v1 +; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, 0x5000, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX900-NEXT: s_movk_i32 s2, 0x7f ; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: s_movk_i32 s0, 0xd000 -; GFX900-NEXT: s_movk_i32 s1, 0xe000 -; GFX900-NEXT: s_movk_i32 s3, 0xf000 +; GFX900-NEXT: s_movk_i32 s0, 0x7f ; GFX900-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX900-NEXT: ; =>This Loop Header: Depth=1 ; GFX900-NEXT: ; Child Loop BB1_2 Depth 2 ; GFX900-NEXT: v_mov_b32_e32 v6, v2 ; GFX900-NEXT: v_mov_b32_e32 v5, v1 -; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s1, 0 ; GFX900-NEXT: .LBB1_2: ; %for.body ; GFX900-NEXT: ; Parent Loop BB1_1 Depth=1 ; GFX900-NEXT: ; => This Inner Loop Header: Depth=2 @@ -599,23 +591,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: global_load_dwordx2 v[7:8], v[7:8], off ; GFX900-NEXT: v_addc_co_u32_e32 v14, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[17:18], v[13:14], off offset:-2048 -; GFX900-NEXT: v_add_co_u32_e32 v15, vcc, s0, v5 +; GFX900-NEXT: v_add_co_u32_e32 v15, vcc, 0xffffd000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v16, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[15:16], v[15:16], off offset:-2048 -; GFX900-NEXT: v_add_co_u32_e32 v19, vcc, s1, v5 +; GFX900-NEXT: v_add_co_u32_e32 v19, vcc, 0xffffe000, v5 ; GFX900-NEXT: global_load_dwordx2 v[13:14], v[13:14], off ; GFX900-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[23:24], v[19:20], off offset:-4096 ; GFX900-NEXT: global_load_dwordx2 v[25:26], v[19:20], off offset:-2048 ; GFX900-NEXT: global_load_dwordx2 v[27:28], v[19:20], off -; GFX900-NEXT: v_add_co_u32_e32 v21, vcc, s3, v5 +; GFX900-NEXT: v_add_co_u32_e32 v21, vcc, 0xfffff000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v22, vcc, -1, v6, vcc ; GFX900-NEXT: global_load_dwordx2 v[19:20], v[21:22], off offset:-2048 ; GFX900-NEXT: global_load_dwordx2 v[29:30], v[5:6], off ; GFX900-NEXT: v_add_co_u32_e32 v5, vcc, 0x10000, v5 ; GFX900-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX900-NEXT: s_addk_i32 s4, 0x2000 -; GFX900-NEXT: s_cmp_gt_u32 s4, 0x3fffff +; GFX900-NEXT: s_addk_i32 s1, 0x2000 +; GFX900-NEXT: s_cmp_gt_u32 s1, 0x3fffff ; GFX900-NEXT: s_waitcnt vmcnt(8) ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v4, vcc @@ -649,11 +641,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX900-NEXT: ; %bb.3: ; %while.cond.loopexit ; GFX900-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GFX900-NEXT: s_add_i32 s4, s2, -1 -; GFX900-NEXT: s_cmp_eq_u32 s2, 0 +; GFX900-NEXT: s_add_i32 s1, s0, -1 +; GFX900-NEXT: s_cmp_eq_u32 s0, 0 ; GFX900-NEXT: s_cbranch_scc1 .LBB1_5 ; GFX900-NEXT: ; %bb.4: ; in Loop: Header=BB1_1 Depth=1 -; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s0, s1 ; GFX900-NEXT: s_branch .LBB1_1 ; GFX900-NEXT: .LBB1_5: ; %while.end ; GFX900-NEXT: v_mov_b32_e32 v1, s35 @@ -805,19 +797,15 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_mov_b32_e32 v2, s35 ; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v2, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x5000 -; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v1 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x5000, v1 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX90A-NEXT: s_movk_i32 s2, 0x7f +; GFX90A-NEXT: s_movk_i32 s0, 0x7f ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0 -; GFX90A-NEXT: s_movk_i32 s0, 0xd000 -; GFX90A-NEXT: s_movk_i32 s1, 0xe000 -; GFX90A-NEXT: s_movk_i32 s3, 0xf000 ; GFX90A-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB1_2 Depth 2 ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[0,1] -; GFX90A-NEXT: s_mov_b32 s4, 0 +; GFX90A-NEXT: s_mov_b32 s1, 0 ; GFX90A-NEXT: .LBB1_2: ; %for.body ; GFX90A-NEXT: ; Parent Loop BB1_1 Depth=1 ; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2 @@ -829,23 +817,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: global_load_dwordx2 v[12:13], v[12:13], off ; GFX90A-NEXT: v_addc_co_u32_e32 v15, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[18:19], v[14:15], off offset:-2048 -; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, s0, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, 0xffffd000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[16:17], off offset:-2048 -; GFX90A-NEXT: v_add_co_u32_e32 v20, vcc, s1, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v20, vcc, 0xffffe000, v6 ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[14:15], off ; GFX90A-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[24:25], v[20:21], off offset:-4096 ; GFX90A-NEXT: global_load_dwordx2 v[26:27], v[20:21], off offset:-2048 ; GFX90A-NEXT: global_load_dwordx2 v[28:29], v[20:21], off -; GFX90A-NEXT: v_add_co_u32_e32 v22, vcc, s3, v6 +; GFX90A-NEXT: v_add_co_u32_e32 v22, vcc, 0xfffff000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v7, vcc ; GFX90A-NEXT: global_load_dwordx2 v[20:21], v[22:23], off offset:-2048 ; GFX90A-NEXT: global_load_dwordx2 v[30:31], v[6:7], off ; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, 0x10000, v6 ; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc -; GFX90A-NEXT: s_addk_i32 s4, 0x2000 -; GFX90A-NEXT: s_cmp_gt_u32 s4, 0x3fffff +; GFX90A-NEXT: s_addk_i32 s1, 0x2000 +; GFX90A-NEXT: s_cmp_gt_u32 s1, 0x3fffff ; GFX90A-NEXT: s_waitcnt vmcnt(8) ; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, v12, v4 ; GFX90A-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc @@ -879,11 +867,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX90A-NEXT: ; %bb.3: ; %while.cond.loopexit ; GFX90A-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GFX90A-NEXT: s_add_i32 s4, s2, -1 -; GFX90A-NEXT: s_cmp_eq_u32 s2, 0 +; GFX90A-NEXT: s_add_i32 s1, s0, -1 +; GFX90A-NEXT: s_cmp_eq_u32 s0, 0 ; GFX90A-NEXT: s_cbranch_scc1 .LBB1_5 ; GFX90A-NEXT: ; %bb.4: ; in Loop: Header=BB1_1 Depth=1 -; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s0, s1 ; GFX90A-NEXT: s_branch .LBB1_1 ; GFX90A-NEXT: .LBB1_5: ; %while.end ; GFX90A-NEXT: v_mov_b32_e32 v1, s35 @@ -1163,10 +1151,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x1800 ; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x1c00 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x1c00, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: flat_load_dword v2, v[0:1] ; GFX8-NEXT: flat_load_dword v19, v[5:6] ; GFX8-NEXT: flat_load_dword v7, v[7:8] @@ -1175,7 +1161,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-NEXT: flat_load_dword v10, v[13:14] ; GFX8-NEXT: flat_load_dword v11, v[15:16] ; GFX8-NEXT: flat_load_dword v12, v[17:18] -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x2000, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x2400, v0 ; GFX8-NEXT: flat_load_dword v5, v[5:6] @@ -1230,10 +1216,9 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s34, v4 ; GFX900-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v0, vcc ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 2, v[1:2] -; GFX900-NEXT: s_movk_i32 s0, 0x1000 ; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc -; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dword v5, v[0:1], off ; GFX900-NEXT: global_load_dword v6, v[0:1], off offset:1024 @@ -1357,8 +1342,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3] ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, v5, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v1, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dword v5, v[0:1], off ; GFX90A-NEXT: global_load_dword v6, v[0:1], off offset:1024 @@ -1526,10 +1510,9 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0xf000 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0xf800 ; GFX8-NEXT: flat_load_dwordx2 v[7:8], v[0:1] ; GFX8-NEXT: flat_load_dwordx2 v[5:6], v[5:6] -; GFX8-NEXT: v_add_u32_e32 v9, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xfffff800, v0 ; GFX8-NEXT: v_addc_u32_e32 v10, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[9:10], v[9:10] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 @@ -1804,11 +1787,9 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc -; GFX8-NEXT: s_mov_b32 s0, 0x7ffff800 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7ffff800, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v1, vcc -; GFX8-NEXT: s_mov_b32 s0, 0x7ffffc00 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x7ffffc00, v0 ; GFX8-NEXT: v_addc_u32_e32 v8, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dword v2, v[0:1] ; GFX8-NEXT: flat_load_dword v5, v[5:6] @@ -2348,13 +2329,11 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_movk_i32 s0, 0x2000 ; GFX8-NEXT: v_add_u32_e32 v13, vcc, s0, v0 ; GFX8-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x1800 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, 0x1800, v0 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[13:14], v[13:14] ; GFX8-NEXT: flat_load_dwordx2 v[15:16], v[15:16] -; GFX8-NEXT: s_movk_i32 s0, 0x1000 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s0, v0 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0x1000, v0 ; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx2 v[17:18], v[17:18] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x800, v0 @@ -2424,8 +2403,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX900-NEXT: v_add_co_u32_e32 v4, vcc, s0, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[10:11], v[4:5], off offset:2048 -; GFX900-NEXT: s_movk_i32 s0, 0x1000 -; GFX900-NEXT: v_add_co_u32_e32 v12, vcc, s0, v0 +; GFX900-NEXT: v_add_co_u32_e32 v12, vcc, 0x1000, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc ; GFX900-NEXT: global_load_dwordx2 v[14:15], v[12:13], off ; GFX900-NEXT: global_load_dwordx2 v[16:17], v[4:5], off @@ -2571,8 +2549,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, s0, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[10:11], v[4:5], off offset:2048 -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v12, vcc, s0, v0 +; GFX90A-NEXT: v_add_co_u32_e32 v12, vcc, 0x1000, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc ; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[12:13], off ; GFX90A-NEXT: global_load_dwordx2 v[16:17], v[4:5], off @@ -2743,8 +2720,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v4, v1, vcc -; GFX8-NEXT: s_movk_i32 s0, 0x800 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x800, v2 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, -1, v6, vcc ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0, v2 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, -1, v6, vcc @@ -2784,10 +2760,9 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX900-NEXT: v_add_co_u32_e32 v3, vcc, s34, v8 ; GFX900-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v0, vcc ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 3, v[1:2] -; GFX900-NEXT: s_movk_i32 s0, 0x1000 ; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, v3, v0 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, v4, v1, vcc -; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX900-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX900-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v3, vcc ; GFX900-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 ; GFX900-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc @@ -2871,8 +2846,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 3, v[2:3] ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, v4, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v1, vcc -; GFX90A-NEXT: s_movk_i32 s0, 0x1000 -; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v3, vcc ; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 ; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 705a2af73959065..7361d9d9f795e39 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1394,8 +1394,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v4, v5 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, s6, v8 +; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, 0xffffffc5, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v6, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[5:6] @@ -1587,8 +1586,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v4, v5 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, s6, v8 +; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, 0xffffffd0, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v6, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[5:6] @@ -1722,13 +1720,12 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[9:10], v[9:10], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v0, 31, v4 ; GCN-IR-NEXT: v_or_b32_e32 v0, v9, v0 -; GCN-IR-NEXT: v_sub_i32_e32 v5, vcc, s12, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v5, vcc, 0x7fff, v0 ; GCN-IR-NEXT: v_subb_u32_e32 v5, vcc, 0, v10, vcc ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v7 ; GCN-IR-NEXT: v_lshl_b64 v[3:4], v[3:4], 1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index 08db1e7fee259d6..dd4eb0ae2a09ed6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -5000,22 +5000,14 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 ; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 ; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x80 +; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 ; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, s2, v5 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x84 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x204 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x284 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 @@ -5043,1268 +5035,1268 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v2 ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x74 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x100 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x180 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x200 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x174 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x280 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1f4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x300 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x274 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x380 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2f4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x400 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v2 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x374 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3f4 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x404 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x414 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x424 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x434 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x444 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x454 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x464 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x474 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x484 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x494 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x504 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x514 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x524 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x534 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x544 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x554 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x564 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x574 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x584 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x594 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x604 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x614 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x624 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x634 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x644 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x654 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x664 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x674 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x684 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x694 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x704 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x714 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x724 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x734 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x744 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x754 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x764 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x774 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x784 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x794 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x804 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x814 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x824 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x834 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x844 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x854 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x864 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x874 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x884 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x894 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x904 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x914 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x924 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x934 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x944 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x954 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x964 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x974 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x984 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x994 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 ; GFX9-FLATSCR-NEXT: ;;#ASMSTART @@ -7346,7 +7338,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7380,7 +7372,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7414,7 +7406,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7448,7 +7440,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7482,7 +7474,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7516,7 +7508,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7550,7 +7542,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 613349f32e2d5ad..f8898ddf9290bcd 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1583,8 +1583,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, s6, v6 +; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc5, v6 ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] @@ -1774,8 +1773,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 -; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, s6, v6 +; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, 0xffffffd0, v6 ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] @@ -1914,13 +1912,12 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 ; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, s12, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 0x7fff, v10 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 ; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc ; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index c5ab44e31c0320d..ea938d5a121a24c 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1283,13 +1283,12 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[7:8], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GCN-IR-NEXT: v_or_b32_e32 v6, v7, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 0x7fff, v6 ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v8, vcc ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 1, v0 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 894c96acbbcd6b1..c1119e0a9cc3336 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1307,13 +1307,12 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff ; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v8 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 0x7fff, v8 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 From lldb-commits at lists.llvm.org Tue Oct 3 05:11:36 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 05:11:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMP] Improve omp offload profiler (PR #68016) In-Reply-To: Message-ID: <651c04f8.170a0220.4616f.22f0@mx.google.com> https://github.com/fel-cab updated https://github.com/llvm/llvm-project/pull/68016 >From dd44de067c26ba94b6561c5ed7fa4a5d812a3d1a Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Mon, 18 Sep 2023 12:07:12 +0000 Subject: [PATCH 01/11] testing Profiler features --- openmp/libomptarget/src/interface.cpp | 5 ++++- openmp/libomptarget/src/private.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 5f21b16b3fbfb1e..f64e1e268a3952e 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -252,7 +252,10 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_IDENT(Loc); + //TIMESCOPE_WITH_IDENT(Loc); + TIMESCOPE(); + //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT("Hello", Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index cbce15b63a3eba2..dc6cd3944233955 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -433,7 +433,8 @@ class ExponentialBackoff { SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From 92586bca6364100c7511ad38a30f41b0f86dea9c Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 19 Sep 2023 12:02:53 +0000 Subject: [PATCH 02/11] Improve Profiler 1 --- llvm/lib/Support/TimeProfiler.cpp | 2 +- openmp/libomptarget/src/interface.cpp | 17 +++++++++-------- openmp/libomptarget/src/omptarget.cpp | 10 +++++----- openmp/libomptarget/src/private.h | 5 +++-- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 4d625b3eb5b1709..e1458116f64ab47 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -227,7 +227,7 @@ struct llvm::TimeTraceProfiler { J.attribute("ph", "X"); J.attribute("ts", 0); J.attribute("dur", DurUs); - J.attribute("name", "Total " + Total.first); + J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); J.attribute("avg ms", int64_t(DurUs / Count / 1000)); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index f64e1e268a3952e..b8892cbe689107f 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -33,14 +33,14 @@ using namespace llvm::omp::target::ompt; //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { - TIMESCOPE(); + //TIMESCOPE(); PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { - TIMESCOPE(); + //TIMESCOPE(); if (PM->maybeDelayRegisterLib(Desc)) return; @@ -61,7 +61,7 @@ EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { - TIMESCOPE(); + //TIMESCOPE(); PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { @@ -82,7 +82,8 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("targetData", Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -253,9 +254,9 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_IDENT(Loc); - TIMESCOPE(); + //TIMESCOPE(); //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); - //TIMESCOPE_WITH_RTM_AND_IDENT("Hello", Loc); + //TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", @@ -411,7 +412,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, // Get the current number of components for a user-defined mapper. EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { - TIMESCOPE(); + //TIMESCOPE(); auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; int64_t Size = MapperComponentsPtr->Components.size(); DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", @@ -423,7 +424,7 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, void *Begin, int64_t Size, int64_t Type, void *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("__tgt_push_mapper_component(Handle=" DPxMOD ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s).\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 40419e448942608..3754f63909dac9c 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -398,7 +398,7 @@ static int32_t getParentIndex(int64_t Type) { void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -427,7 +427,7 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum, DPxPTR(DevicePtr)); @@ -453,7 +453,7 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -493,7 +493,7 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, } void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { - TIMESCOPE(); + //TIMESCOPE(); DP("Call to %s for device %d unlocking\n", Name, DeviceNum); DeviceTy *DevicePtr = nullptr; @@ -572,7 +572,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - TIMESCOPE_WITH_IDENT(Loc); + //TIMESCOPE_WITH_IDENT(Loc); // process each input. for (int32_t I = 0; I < ArgNum; ++I) { // Ignore private variables and arrays - there is no mapping for them. diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index dc6cd3944233955..b1ada09d64c7a55 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -433,8 +433,9 @@ class ExponentialBackoff { SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) + #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From f9167dc8fef277ac1aa53e2e95bade3f0b727df1 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 19 Sep 2023 21:33:24 +0000 Subject: [PATCH 03/11] Changed profiling to work in nanoseconds. Made Profiling calls for runtime calls and different ones for kernel lunches and memory transfers. --- llvm/lib/Support/TimeProfiler.cpp | 28 +++++++++++++-------------- openmp/libomptarget/src/interface.cpp | 7 ++----- openmp/libomptarget/src/omptarget.cpp | 11 +++++++---- openmp/libomptarget/src/private.h | 6 +++--- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index e1458116f64ab47..64b3ef35be27c42 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -30,7 +30,7 @@ namespace { using std::chrono::duration; using std::chrono::duration_cast; -using std::chrono::microseconds; +using std::chrono::nanoseconds; using std::chrono::steady_clock; using std::chrono::system_clock; using std::chrono::time_point; @@ -80,14 +80,14 @@ struct TimeTraceProfilerEntry { // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { - return (time_point_cast(Start) - - time_point_cast(StartTime)) + return (time_point_cast(Start) - + time_point_cast(StartTime)) .count(); } ClockType::rep getFlameGraphDurUs() const { - return (time_point_cast(End) - - time_point_cast(Start)) + return (time_point_cast(End) - + time_point_cast(Start)) .count(); } }; @@ -123,7 +123,7 @@ struct llvm::TimeTraceProfiler { DurationType Duration = E.End - E.Start; // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(Duration).count() >= TimeTraceGranularity) + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -169,8 +169,8 @@ struct llvm::TimeTraceProfiler { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); - J.attribute("ts", StartUs); - J.attribute("dur", DurUs); + J.attribute("ts", StartUs / 1000); + J.attribute("dur", DurUs / 1000); J.attribute("name", E.Name); if (!E.Detail.empty()) { J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); @@ -218,7 +218,7 @@ struct llvm::TimeTraceProfiler { // Report totals on separate threads of tracing file. uint64_t TotalTid = MaxTid + 1; for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurUs = duration_cast(Total.second.second).count(); + auto DurUs = duration_cast(Total.second.second).count(); auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&] { @@ -226,11 +226,11 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurUs); + J.attribute("dur", DurUs / 1000); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); - J.attribute("avg ms", int64_t(DurUs / Count / 1000)); + J.attribute("avg ms", int64_t(DurUs / Count / 1000 / 1000)); }); }); @@ -262,9 +262,9 @@ struct llvm::TimeTraceProfiler { // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) + time_point_cast(BeginningOfTime) .time_since_epoch() - .count()); + .count()/1000); J.objectEnd(); } @@ -281,7 +281,7 @@ struct llvm::TimeTraceProfiler { SmallString<0> ThreadName; const uint64_t Tid; - // Minimum time granularity (in microseconds) + // Minimum time granularity (in nanoseconds) const unsigned TimeTraceGranularity; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index b8892cbe689107f..d4ee246f84449f1 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -83,7 +83,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); - TIMESCOPE_WITH_RTM_AND_IDENT("targetData", Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("Runtime Data Copy", Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -253,10 +253,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - //TIMESCOPE_WITH_IDENT(Loc); - //TIMESCOPE(); - //TIMESCOPE_WITH_NAME_AND_IDENT("Hello", Loc); - //TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); + TIMESCOPE_WITH_NAME_AND_IDENT("Runtime target exe",Loc); DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 3754f63909dac9c..ad966e7e1c47544 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -863,6 +863,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { + //TIMESCOPE_WITH_NAME_AND_IDENT("targetDataEnd", Loc); int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); // process each input. @@ -955,7 +956,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - + TIMESCOPE_WITH_NAME_AND_IDENT("DevToHost", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1445,7 +1446,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, SmallVector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); + //TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); @@ -1493,6 +1494,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, DPxPTR(HstPtrVal)); continue; } + TIMESCOPE_WITH_RTM_AND_IDENT("HostToDev", Loc); DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1572,7 +1574,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, map_var_info_t *ArgNames, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); + //TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; // Move data from device. @@ -1597,6 +1599,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, return Ret; }); + return OFFLOAD_SUCCESS; } } // namespace @@ -1672,7 +1675,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc); + TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index b1ada09d64c7a55..f0591cd17b0fd15 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -432,10 +432,10 @@ class ExponentialBackoff { #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ + std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(ProfileLocation, ProfileLocation + RTM) - + llvm::TimeTraceScope TimeScope(ProfileName, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) >From c82ce52f244d218752fea2dcc1f347fc589cd016 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Thu, 21 Sep 2023 14:22:28 +0000 Subject: [PATCH 04/11] test with DevToHost --- openmp/libomptarget/src/omptarget.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index ad966e7e1c47544..e113942375ef9c6 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -956,7 +956,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_NAME_AND_IDENT("DevToHost", Loc); + std::string MessageDataSize = "DevToHost "+std::to_string(DataSize)+"B"; + TIMESCOPE_WITH_NAME_AND_IDENT(MessageDataSize, Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { >From 448f0e77b6c824de73cbd9ae34d4c59b02e7e441 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Fri, 22 Sep 2023 21:48:57 +0000 Subject: [PATCH 05/11] Fixing nanoseconds in totals, adding syncronize timings, and adding extra info in kernels and device --- llvm/lib/Support/TimeProfiler.cpp | 24 ++++++++++++------------ openmp/libomptarget/src/interface.cpp | 18 ++++++++++-------- openmp/libomptarget/src/omptarget.cpp | 19 +++++++++---------- openmp/libomptarget/src/private.h | 10 +++++++--- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 64b3ef35be27c42..4446583102a8133 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -79,13 +79,13 @@ struct TimeTraceProfilerEntry { // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. - ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { + ClockType::rep getFlameGraphStartNs(TimePointType StartTime) const { return (time_point_cast(Start) - time_point_cast(StartTime)) .count(); } - ClockType::rep getFlameGraphDurUs() const { + ClockType::rep getFlameGraphDurNs() const { return (time_point_cast(End) - time_point_cast(Start)) .count(); @@ -114,9 +114,9 @@ struct llvm::TimeTraceProfiler { // Check that end times monotonically increase. assert((Entries.empty() || - (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= - Entries.back().getFlameGraphStartUs(StartTime) + - Entries.back().getFlameGraphDurUs())) && + (E.getFlameGraphStartNs(StartTime) + E.getFlameGraphDurNs() >= + Entries.back().getFlameGraphStartNs(StartTime) + + Entries.back().getFlameGraphDurNs())) && "TimeProfiler scope ended earlier than previous scope"); // Calculate duration at full precision for overall counts. @@ -162,15 +162,15 @@ struct llvm::TimeTraceProfiler { // Emit all events for the main flame graph. auto writeEvent = [&](const auto &E, uint64_t Tid) { - auto StartUs = E.getFlameGraphStartUs(StartTime); - auto DurUs = E.getFlameGraphDurUs(); + auto StartNs = E.getFlameGraphStartNs(StartTime); + auto DurNs = E.getFlameGraphDurNs(); J.object([&] { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); - J.attribute("ts", StartUs / 1000); - J.attribute("dur", DurUs / 1000); + J.attribute("ts", StartNs / 1000); + J.attribute("dur", DurNs / 1000); J.attribute("name", E.Name); if (!E.Detail.empty()) { J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); @@ -218,7 +218,7 @@ struct llvm::TimeTraceProfiler { // Report totals on separate threads of tracing file. uint64_t TotalTid = MaxTid + 1; for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurUs = duration_cast(Total.second.second).count(); + auto DurNs = duration_cast(Total.second.second).count(); auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&] { @@ -226,11 +226,11 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurUs / 1000); + J.attribute("dur", DurNs / 1000 ); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); - J.attribute("avg ms", int64_t(DurUs / Count / 1000 / 1000)); + J.attribute("avg us", int64_t(DurNs / Count / 1000)); }); }); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index d4ee246f84449f1..bed9b1e40db455b 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -83,7 +83,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); - TIMESCOPE_WITH_RTM_AND_IDENT("Runtime Data Copy", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -252,9 +252,6 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, KernelArgsTy *KernelArgs) { static_assert(std::is_convertible_v, "Target AsyncInfoTy must be convertible to AsyncInfoTy."); - - TIMESCOPE_WITH_NAME_AND_IDENT("Runtime target exe",Loc); - DP("Entering target region for device %" PRId64 " with entry point " DPxMOD "\n", DeviceId, DPxPTR(HostPtr)); @@ -279,7 +276,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(KernelArgs->ThreadLimit[0] == static_cast(ThreadLimit) && !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); - + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", + "NumTeams="+std::to_string(NumTeams)+ + ";NumArgs="+std::to_string(KernelArgs->NumArgs) + , Loc); + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, KernelArgs->ArgSizes, KernelArgs->ArgTypes, @@ -303,16 +304,17 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - + int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - + { + TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); - + } return OMP_TGT_SUCCESS; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index e113942375ef9c6..5f6168b0bd2fca0 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -579,7 +579,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - + TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev","Size="+std::to_string(ArgSizes[I])+"B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -863,7 +863,6 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - //TIMESCOPE_WITH_NAME_AND_IDENT("targetDataEnd", Loc); int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); // process each input. @@ -956,8 +955,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - std::string MessageDataSize = "DevToHost "+std::to_string(DataSize)+"B"; - TIMESCOPE_WITH_NAME_AND_IDENT(MessageDataSize, Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+std::to_string(DataSize)+"B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1447,7 +1445,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, SmallVector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - //TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); @@ -1494,8 +1491,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); continue; - } - TIMESCOPE_WITH_RTM_AND_IDENT("HostToDev", Loc); + } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1575,7 +1571,6 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, map_var_info_t *ArgNames, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - //TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; // Move data from device. @@ -1676,8 +1671,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_RTM_AND_IDENT("Kernel", Loc); - + TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", + "NumArguments="+std::to_string(KernelArgs.NumArgs) + +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) + +";TripCount="+std::to_string(KernelArgs.Tripcount) + , Loc); + #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && "Multi dimensional launch not supported yet."); diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index f0591cd17b0fd15..4bc1db79de3f2b7 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -432,14 +432,18 @@ class ExponentialBackoff { #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ - std::string ProfileName = SI.getName(); \ + std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(ProfileName, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) +#define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ + SourceInfo SI(IDENT); \ + std::string ProfileLocation = SI.getProfileLocation(); \ + llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + Details) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) - +#define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) #endif >From c8bb24e807324a6a42b50076e5a3d2159f1d6d74 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 26 Sep 2023 15:58:50 +0000 Subject: [PATCH 06/11] Some fixes to the profiler --- openmp/libomptarget/src/api.cpp | 7 +++++++ openmp/libomptarget/src/interface.cpp | 16 +++++----------- openmp/libomptarget/src/omptarget.cpp | 5 ----- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 942df8fdb94d660..f628a64c5b69fa4 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,6 +50,7 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { + TIMESCOPE(); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -66,6 +67,7 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { } EXTERN void omp_target_free(void *Ptr, int DeviceNum) { + TIMESCOPE(); return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -134,6 +136,11 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { TIMESCOPE(); + /*TIMESCOPE_WITH_DETAILS_AND_IDENT("omp_target_memcpy", + "NumArguments="+std::to_string(KernelArgs.NumArgs) + +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) + +";TripCount="+std::to_string(KernelArgs.Tripcount) + , __FUNCTION__);*/ DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index bed9b1e40db455b..61a340ccf8d1b10 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -33,14 +33,12 @@ using namespace llvm::omp::target::ompt; //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { - //TIMESCOPE(); PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { - //TIMESCOPE(); if (PM->maybeDelayRegisterLib(Desc)) return; @@ -61,7 +59,6 @@ EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { - //TIMESCOPE(); PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { @@ -82,7 +79,6 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - //TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, Loc); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", @@ -307,13 +303,13 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - { + { //required to show syncronization TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); - if (Rc == OFFLOAD_SUCCESS) - Rc = AsyncInfo.synchronize(); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); - handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); - assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); + assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!"); } return OMP_TGT_SUCCESS; } @@ -411,7 +407,6 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, // Get the current number of components for a user-defined mapper. EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { - //TIMESCOPE(); auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; int64_t Size = MapperComponentsPtr->Components.size(); DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", @@ -423,7 +418,6 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, void *Begin, int64_t Size, int64_t Type, void *Name) { - //TIMESCOPE(); DP("__tgt_push_mapper_component(Handle=" DPxMOD ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s).\n", diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 5f6168b0bd2fca0..450f34894fb56b4 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -398,7 +398,6 @@ static int32_t getParentIndex(int64_t Type) { void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -427,7 +426,6 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum, DPxPTR(DevicePtr)); @@ -453,7 +451,6 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size); if (Size <= 0) { @@ -493,7 +490,6 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, } void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { - //TIMESCOPE(); DP("Call to %s for device %d unlocking\n", Name, DeviceNum); DeviceTy *DevicePtr = nullptr; @@ -572,7 +568,6 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { - //TIMESCOPE_WITH_IDENT(Loc); // process each input. for (int32_t I = 0; I < ArgNum; ++I) { // Ignore private variables and arrays - there is no mapping for them. >From da71cf17918c56e6a64c1e966dbb5d0dd79d0ed9 Mon Sep 17 00:00:00 2001 From: Felipe Cabarcas Date: Tue, 26 Sep 2023 21:06:06 +0000 Subject: [PATCH 07/11] Adding information to some omp api calls --- openmp/libomptarget/src/api.cpp | 22 ++++++++++++---------- openmp/libomptarget/src/private.h | 7 +++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index f628a64c5b69fa4..5dd918808492997 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,7 +50,8 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DeviceNum) + +";size="+std::to_string(Size)); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -135,12 +136,9 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { - TIMESCOPE(); - /*TIMESCOPE_WITH_DETAILS_AND_IDENT("omp_target_memcpy", - "NumArguments="+std::to_string(KernelArgs.NumArgs) - +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) - +";TripCount="+std::to_string(KernelArgs.Tripcount) - , __FUNCTION__);*/ + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(Length)); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -293,7 +291,9 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(Length)); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -321,7 +321,6 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice) { - TIMESCOPE(); DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " @@ -380,7 +379,10 @@ EXTERN int omp_target_memcpy_rect_async( const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE(); + TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) + +";src_dev="+std::to_string(SrcDevice) + +";size="+std::to_string(ElementSize) + +";num_dims="+std::to_string(NumDims)); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index 4bc1db79de3f2b7..c8d07138b180d17 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -434,16 +434,19 @@ class ExponentialBackoff { std::string ProfileLocation = SI.getProfileLocation(); \ std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ - llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) + //llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + Details) +#define TIMESCOPE_WITH_DETAILS(Details) \ + llvm::TimeTraceScope TimeScope(__FUNCTION__, Details) #else #define TIMESCOPE() #define TIMESCOPE_WITH_IDENT(IDENT) #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) +#define TIMESCOPE_WITH_DETAILS(Details) #endif >From f273bbcc66f361fe9cc03d8597ee886122b5e235 Mon Sep 17 00:00:00 2001 From: fel-cab Date: Mon, 2 Oct 2023 12:26:51 +0000 Subject: [PATCH 08/11] Adding information to the LIBOMPTARGET profiler runtime kernel and API calls. --- openmp/libomptarget/src/interface.cpp | 14 ++++++++------ openmp/libomptarget/src/omptarget.cpp | 24 +++++++++++++++--------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 61a340ccf8d1b10..99a7abc7e0bcee9 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -79,7 +79,9 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, static_assert(std::is_convertible_v, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy","NumArgs="+std::to_string(ArgNum), Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", + "NumArgs="+ + std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -273,10 +275,10 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", - "NumTeams="+std::to_string(NumTeams)+ - ";NumArgs="+std::to_string(KernelArgs->NumArgs) - , Loc); - + "NumTeams="+std::to_string(NumTeams)+ + ";NumArgs="+ + std::to_string(KernelArgs->NumArgs), Loc); + if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, KernelArgs->ArgSizes, KernelArgs->ArgTypes, @@ -300,7 +302,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - + int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); { //required to show syncronization diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 450f34894fb56b4..b5a2dfc68569081 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -574,7 +574,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev","Size="+std::to_string(ArgSizes[I])+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev", + "Size="+ + std::to_string(ArgSizes[I])+ + "B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -950,7 +953,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+std::to_string(DataSize)+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+ + std::to_string(DataSize)+"B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1486,7 +1490,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); continue; - } + } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, @@ -1590,7 +1594,6 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, return Ret; }); - return OFFLOAD_SUCCESS; } } // namespace @@ -1667,11 +1670,14 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", - "NumArguments="+std::to_string(KernelArgs.NumArgs) - +";NumTeams="+std::to_string(KernelArgs.NumTeams[0]) - +";TripCount="+std::to_string(KernelArgs.Tripcount) - , Loc); - + "NumArguments="+ + std::to_string(KernelArgs.NumArgs)+ + ";NumTeams="+ + std::to_string(KernelArgs.NumTeams[0])+ + ";TripCount="+ + std::to_string(KernelArgs.Tripcount) + , Loc); + #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && "Multi dimensional launch not supported yet."); >From 08dbdd5ba1c0502b6d1c935bac6cc14acd4f04be Mon Sep 17 00:00:00 2001 From: fel-cab Date: Mon, 2 Oct 2023 19:14:01 +0000 Subject: [PATCH 09/11] Fixing format --- llvm/lib/Support/TimeProfiler.cpp | 10 +++---- openmp/libomptarget/src/api.cpp | 24 ++++++++--------- openmp/libomptarget/src/interface.cpp | 14 +++++----- openmp/libomptarget/src/omptarget.cpp | 39 ++++++++++++--------------- openmp/libomptarget/src/private.h | 2 +- 5 files changed, 42 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 4446583102a8133..330a4d93378affe 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -226,7 +226,7 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurNs / 1000 ); + J.attribute("dur", DurNs / 1000); J.attribute("name", "Total: " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); @@ -261,10 +261,10 @@ struct llvm::TimeTraceProfiler { // Emit the absolute time when this TimeProfiler started. // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. - J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) - .time_since_epoch() - .count()/1000); + J.attribute("beginningOfTime", time_point_cast(BeginningOfTime) + .time_since_epoch() + .count() / + 1000); J.objectEnd(); } diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 5dd918808492997..06de1f8f20b7ae2 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -50,8 +50,8 @@ EXTERN int omp_get_initial_device(void) { } EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DeviceNum) - +";size="+std::to_string(Size)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) + + ";size=" + std::to_string(Size)); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } @@ -136,9 +136,9 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -291,9 +291,9 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(Length)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(Length)); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -379,10 +379,10 @@ EXTERN int omp_target_memcpy_rect_async( const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { - TIMESCOPE_WITH_DETAILS("dst_dev="+std::to_string(DstDevice) - +";src_dev="+std::to_string(SrcDevice) - +";size="+std::to_string(ElementSize) - +";num_dims="+std::to_string(NumDims)); + TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + + ";src_dev=" + std::to_string(SrcDevice) + + ";size=" + std::to_string(ElementSize) + + ";num_dims=" + std::to_string(NumDims)); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 99a7abc7e0bcee9..2c7ab7a49d0bfb0 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -80,8 +80,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, "TargetAsyncInfoTy must be convertible to AsyncInfoTy."); TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy", - "NumArgs="+ - std::to_string(ArgNum), Loc); + "NumArgs=" + std::to_string(ArgNum), Loc); DP("Entering data %s region for device %" PRId64 " with %d mappings\n", RegionName, DeviceId, ArgNum); @@ -274,10 +273,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, assert(KernelArgs->ThreadLimit[0] == static_cast(ThreadLimit) && !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && "OpenMP interface should not use multiple dimensions"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime target exe", - "NumTeams="+std::to_string(NumTeams)+ - ";NumArgs="+ - std::to_string(KernelArgs->NumArgs), Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Runtime target exe", + "NumTeams=" + std::to_string(NumTeams) + + ";NumArgs=" + std::to_string(KernelArgs->NumArgs), + Loc); if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs, @@ -305,7 +305,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, Device, HostPtr, *KernelArgs, AsyncInfo); - { //required to show syncronization + { // required to show syncronization TIMESCOPE_WITH_RTM_AND_IDENT("syncronize", Loc); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index b5a2dfc68569081..277f95d7efa8201 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -574,10 +574,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - TIMESCOPE_WITH_DETAILS_AND_IDENT("HostToDev", - "Size="+ - std::to_string(ArgSizes[I])+ - "B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "HostToDev", "Size=" + std::to_string(ArgSizes[I]) + "B", Loc); if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again @@ -825,14 +823,13 @@ postProcessingTargetDataEnd(DeviceTy *Device, // remaining shadow pointer entries for this struct. const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM; if (HasFrom) { - Entry->foreachShadowPointerInfo( - [&](const ShadowPtrInfoTy &ShadowPtr) { - *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); - return OFFLOAD_SUCCESS; - }); + Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); } // Give up the lock as we either don't need it anymore (e.g., done with @@ -953,8 +950,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - TIMESCOPE_WITH_DETAILS_AND_IDENT("DevToHost","Size="+ - std::to_string(DataSize)+"B", Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1669,14 +1666,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); - TIMESCOPE_WITH_DETAILS_AND_IDENT("Kernel Target", - "NumArguments="+ - std::to_string(KernelArgs.NumArgs)+ - ";NumTeams="+ - std::to_string(KernelArgs.NumTeams[0])+ - ";TripCount="+ - std::to_string(KernelArgs.Tripcount) - , Loc); + TIMESCOPE_WITH_DETAILS_AND_IDENT( + "Kernel Target", + "NumArguments=" + std::to_string(KernelArgs.NumArgs) + + ";NumTeams=" + std::to_string(KernelArgs.NumTeams[0]) + + ";TripCount=" + std::to_string(KernelArgs.Tripcount), + Loc); #ifdef OMPT_SUPPORT assert(KernelArgs.NumTeams[1] == 0 && KernelArgs.NumTeams[2] == 0 && diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index c8d07138b180d17..8657390dde17dc1 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -435,7 +435,7 @@ class ExponentialBackoff { std::string ProfileName = SI.getName(); \ std::string RTM = RegionTypeMsg; \ llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM) - //llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) +// llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + RTM) #define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \ SourceInfo SI(IDENT); \ std::string ProfileLocation = SI.getProfileLocation(); \ >From 536f58d0b8639fbccb2467634bf52ab4e737c121 Mon Sep 17 00:00:00 2001 From: fel-cab Date: Mon, 2 Oct 2023 20:51:40 +0000 Subject: [PATCH 10/11] Change of lld/test/MachO/map-file.s: adding : to the test check, given that the profile added the colons to make the total more clear in the trace --- lld/test/MachO/map-file.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s index aa9fff9938eb281..279a15b8e33e606 100644 --- a/lld/test/MachO/map-file.s +++ b/lld/test/MachO/map-file.s @@ -89,7 +89,7 @@ # CHECK-NEXT: 0x[[#%X,BSS]] 0x00000001 [ 2] _number # CHECK-EMPTY: -# MAPFILE: "name":"Total Write map file" +# MAPFILE: "name":"Total: Write map file" # RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o -force_load \ # RUN: %t/libfoo.a %t/c-string-literal.o %t/libbaz.dylib -o %t/stripped >From 089c0adb5f120a722c576f3feb8d000e621cfc84 Mon Sep 17 00:00:00 2001 From: fel-cab Date: Tue, 3 Oct 2023 12:10:13 +0000 Subject: [PATCH 11/11] Removing TimeProfiler microsecond to nanosecond change from this PR, it will be a separate PR --- llvm/lib/Support/TimeProfiler.cpp | 48 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 330a4d93378affe..4d625b3eb5b1709 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -30,7 +30,7 @@ namespace { using std::chrono::duration; using std::chrono::duration_cast; -using std::chrono::nanoseconds; +using std::chrono::microseconds; using std::chrono::steady_clock; using std::chrono::system_clock; using std::chrono::time_point; @@ -79,15 +79,15 @@ struct TimeTraceProfilerEntry { // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. - ClockType::rep getFlameGraphStartNs(TimePointType StartTime) const { - return (time_point_cast(Start) - - time_point_cast(StartTime)) + ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast(Start) - + time_point_cast(StartTime)) .count(); } - ClockType::rep getFlameGraphDurNs() const { - return (time_point_cast(End) - - time_point_cast(Start)) + ClockType::rep getFlameGraphDurUs() const { + return (time_point_cast(End) - + time_point_cast(Start)) .count(); } }; @@ -114,16 +114,16 @@ struct llvm::TimeTraceProfiler { // Check that end times monotonically increase. assert((Entries.empty() || - (E.getFlameGraphStartNs(StartTime) + E.getFlameGraphDurNs() >= - Entries.back().getFlameGraphStartNs(StartTime) + - Entries.back().getFlameGraphDurNs())) && + (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + Entries.back().getFlameGraphStartUs(StartTime) + + Entries.back().getFlameGraphDurUs())) && "TimeProfiler scope ended earlier than previous scope"); // Calculate duration at full precision for overall counts. DurationType Duration = E.End - E.Start; // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(Duration).count() >= TimeTraceGranularity) + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -162,15 +162,15 @@ struct llvm::TimeTraceProfiler { // Emit all events for the main flame graph. auto writeEvent = [&](const auto &E, uint64_t Tid) { - auto StartNs = E.getFlameGraphStartNs(StartTime); - auto DurNs = E.getFlameGraphDurNs(); + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); J.object([&] { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); - J.attribute("ts", StartNs / 1000); - J.attribute("dur", DurNs / 1000); + J.attribute("ts", StartUs); + J.attribute("dur", DurUs); J.attribute("name", E.Name); if (!E.Detail.empty()) { J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); @@ -218,7 +218,7 @@ struct llvm::TimeTraceProfiler { // Report totals on separate threads of tracing file. uint64_t TotalTid = MaxTid + 1; for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurNs = duration_cast(Total.second.second).count(); + auto DurUs = duration_cast(Total.second.second).count(); auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&] { @@ -226,11 +226,11 @@ struct llvm::TimeTraceProfiler { J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); - J.attribute("dur", DurNs / 1000); - J.attribute("name", "Total: " + Total.first); + J.attribute("dur", DurUs); + J.attribute("name", "Total " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); - J.attribute("avg us", int64_t(DurNs / Count / 1000)); + J.attribute("avg ms", int64_t(DurUs / Count / 1000)); }); }); @@ -261,10 +261,10 @@ struct llvm::TimeTraceProfiler { // Emit the absolute time when this TimeProfiler started. // This can be used to combine the profiling data from // multiple processes and preserve actual time intervals. - J.attribute("beginningOfTime", time_point_cast(BeginningOfTime) - .time_since_epoch() - .count() / - 1000); + J.attribute("beginningOfTime", + time_point_cast(BeginningOfTime) + .time_since_epoch() + .count()); J.objectEnd(); } @@ -281,7 +281,7 @@ struct llvm::TimeTraceProfiler { SmallString<0> ThreadName; const uint64_t Tid; - // Minimum time granularity (in nanoseconds) + // Minimum time granularity (in microseconds) const unsigned TimeTraceGranularity; }; From lldb-commits at lists.llvm.org Tue Oct 3 05:16:55 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Tue, 03 Oct 2023 05:16:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) Message-ID: https://github.com/DavidSpickett created https://github.com/llvm/llvm-project/pull/68094 FEAT_SME_FA64 (smefa64 in Linux cpuinfo) allows the use of the full A64 instruction set while in streaming SVE mode. See https://developer.arm.com/documentation/ddi0616/latest/ for details. This means for example if we want to write to the ffr register during or use floating point registers while in streaming mode, we need this extension. I initially was using QEMU which has it by default, and switched to Arm's FVP which does not. So this change adds a more strict check and converts most of the tests to use that. It would be possible in some cases to avoid the offending instructions but it would be a lot of effort and liable to fail randomly as the C library changes. It is also my assumption that the majority of systems will have smefa64 as QEMU has chosen to have. If I turn out to be wrong, we can make the effort to get the tests working without smefa64. >From 3816b0fbc31825d3878b031a49fb78dd7c256278 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Oct 2023 11:44:17 +0100 Subject: [PATCH] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing FEAT_SME_FA64 (smefa64 in Linux cpuinfo) allows the use of the full A64 instruction set while in streaming SVE mode. See https://developer.arm.com/documentation/ddi0616/latest/ for details. This means for example if we want to write to the ffr register during or use floating point registers while in streaming mode, we need this extension. I initially was using QEMU which has it by default, and switched to Arm's FVP which does not. So this change adds a more strict check and converts most of the tests to use that. It would be possible in some cases to avoid the offending instructions but it would be a lot of effort and liable to fail randomly as the C library changes. It is also my assumption that the majority of systems will have smefa64 as QEMU has chosen to have. If I turn out to be wrong, we can make the effort to get the tests working without smefa64. --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 6 ++++++ .../aarch64_dynamic_regset/TestArm64DynamicRegsets.py | 4 ++-- .../rw_access_dynamic_resize/TestSVEThreadedDynamic.py | 10 ++++++---- .../rw_access_static_config/TestSVERegisters.py | 5 +++-- .../aarch64_sve_simd_registers/TestSVESIMDRegisters.py | 5 +++-- .../za_dynamic_resize/TestZAThreadedDynamic.py | 6 ++++-- .../aarch64_za_register/za_dynamic_resize/main.c | 1 + .../za_save_restore/TestZARegisterSaveRestore.py | 4 ++-- 8 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index c8670b208ec3f0c..2f4130d3ce68ae0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1271,6 +1271,12 @@ def isAArch64SVE(self): def isAArch64SME(self): return self.isAArch64() and "sme" in self.getCPUInfo() + def isAArch64SMEFA64(self): + # smefa64 allows the use of the full A64 instruction set in streaming + # mode. This is required by certain test programs to setup register + # state. + return self.isAArch64SME() and "smefa64" in self.getCPUInfo() + def isAArch64MTE(self): return self.isAArch64() and "mte" in self.getCPUInfo() diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index 2fb8b33126417c2..0ad69c268a9fd29 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -142,8 +142,8 @@ def make_za_value(self, vl, generator): def test_aarch64_dynamic_regset_config_sme(self): """Test AArch64 Dynamic Register sets configuration, but only SME registers.""" - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present") register_sets = self.setup_register_config_test("sme") diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index 8bcb76776459d01..b19039f0b5212b4 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -108,8 +108,9 @@ def run_sve_test(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be supported and the " + "smefa64 extension must be present.") self.build_for_mode(mode) @@ -201,8 +202,9 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. - if not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be present and the " + "smefa64 extension must be present.") self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 82b79b8d4b6cc2b..ac99652442b5ddd 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -85,8 +85,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index 814ca98369fca57..def93c78abc2745 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -41,8 +41,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py index 65d1071c26b2a34..884340b395a448d 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py @@ -65,8 +65,10 @@ def check_disabled_za_register(self, svg): self.expect("register read za", substrs=[self.gen_za_value(svg, lambda r: 0)]) def za_test_impl(self, enable_za): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + # Although the test program doesn't obviously do any operations that + # would need smefa64, calls to libc functions like memset may do. + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the sm3fa64 extension must be present") self.build() supported_vg = self.get_supported_vg() diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c index fd2590dbe411f7f..05839c26336cc8e 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c @@ -29,6 +29,7 @@ void set_za_register(int svl, int value_offset) { // you have. So setting one that didn't exist would actually set one that did. // That's why we need the streaming vector length here. for (int i = 0; i < svl; ++i) { + // This may involve instructions that require the smefa64 extension. memset(data, i + value_offset, MAX_VL_BYTES); // Each one of these loads a VL sized row of ZA. asm volatile("mov w12, %w0\n\t" diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py index 910966a0b3b0bc5..a647c91f71119ec 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py @@ -106,8 +106,8 @@ def check_za_disabled(self, vl): self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)]) def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present.") supported_svg = self.get_supported_svg() if len(supported_svg) < 2: From lldb-commits at lists.llvm.org Tue Oct 3 05:18:03 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 05:18:03 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <651c067b.a70a0220.b6f84.2732@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes FEAT_SME_FA64 (smefa64 in Linux cpuinfo) allows the use of the full A64 instruction set while in streaming SVE mode. See https://developer.arm.com/documentation/ddi0616/latest/ for details. This means for example if we want to write to the ffr register during or use floating point registers while in streaming mode, we need this extension. I initially was using QEMU which has it by default, and switched to Arm's FVP which does not. So this change adds a more strict check and converts most of the tests to use that. It would be possible in some cases to avoid the offending instructions but it would be a lot of effort and liable to fail randomly as the C library changes. It is also my assumption that the majority of systems will have smefa64 as QEMU has chosen to have. If I turn out to be wrong, we can make the effort to get the tests working without smefa64. --- Full diff: https://github.com/llvm/llvm-project/pull/68094.diff 8 Files Affected: - (modified) lldb/packages/Python/lldbsuite/test/lldbtest.py (+6) - (modified) lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py (+2-2) - (modified) lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py (+6-4) - (modified) lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py (+3-2) - (modified) lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py (+3-2) - (modified) lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py (+4-2) - (modified) lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c (+1) - (modified) lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py (+2-2) ``````````diff diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index c8670b208ec3f0c..2f4130d3ce68ae0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1271,6 +1271,12 @@ def isAArch64SVE(self): def isAArch64SME(self): return self.isAArch64() and "sme" in self.getCPUInfo() + def isAArch64SMEFA64(self): + # smefa64 allows the use of the full A64 instruction set in streaming + # mode. This is required by certain test programs to setup register + # state. + return self.isAArch64SME() and "smefa64" in self.getCPUInfo() + def isAArch64MTE(self): return self.isAArch64() and "mte" in self.getCPUInfo() diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index 2fb8b33126417c2..0ad69c268a9fd29 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -142,8 +142,8 @@ def make_za_value(self, vl, generator): def test_aarch64_dynamic_regset_config_sme(self): """Test AArch64 Dynamic Register sets configuration, but only SME registers.""" - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present") register_sets = self.setup_register_config_test("sme") diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index 8bcb76776459d01..b19039f0b5212b4 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -108,8 +108,9 @@ def run_sve_test(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be supported and the " + "smefa64 extension must be present.") self.build_for_mode(mode) @@ -201,8 +202,9 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. - if not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be present and the " + "smefa64 extension must be present.") self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 82b79b8d4b6cc2b..ac99652442b5ddd 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -85,8 +85,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index 814ca98369fca57..def93c78abc2745 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -41,8 +41,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py index 65d1071c26b2a34..884340b395a448d 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py @@ -65,8 +65,10 @@ def check_disabled_za_register(self, svg): self.expect("register read za", substrs=[self.gen_za_value(svg, lambda r: 0)]) def za_test_impl(self, enable_za): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + # Although the test program doesn't obviously do any operations that + # would need smefa64, calls to libc functions like memset may do. + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the sm3fa64 extension must be present") self.build() supported_vg = self.get_supported_vg() diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c index fd2590dbe411f7f..05839c26336cc8e 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c @@ -29,6 +29,7 @@ void set_za_register(int svl, int value_offset) { // you have. So setting one that didn't exist would actually set one that did. // That's why we need the streaming vector length here. for (int i = 0; i < svl; ++i) { + // This may involve instructions that require the smefa64 extension. memset(data, i + value_offset, MAX_VL_BYTES); // Each one of these loads a VL sized row of ZA. asm volatile("mov w12, %w0\n\t" diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py index 910966a0b3b0bc5..a647c91f71119ec 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py @@ -106,8 +106,8 @@ def check_za_disabled(self, vl): self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)]) def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present.") supported_svg = self.get_supported_svg() if len(supported_svg) < 2: ``````````
https://github.com/llvm/llvm-project/pull/68094 From lldb-commits at lists.llvm.org Tue Oct 3 05:18:31 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Tue, 03 Oct 2023 05:18:31 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef (PR #66768) In-Reply-To: Message-ID: <651c0697.170a0220.1e388.28f1@mx.google.com> DavidSpickett wrote: ping! https://github.com/llvm/llvm-project/pull/66768 From lldb-commits at lists.llvm.org Tue Oct 3 05:18:36 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Tue, 03 Oct 2023 05:18:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651c069c.170a0220.d2882.28f3@mx.google.com> DavidSpickett wrote: ping! https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Tue Oct 3 05:25:00 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Tue, 03 Oct 2023 05:25:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <651c081c.170a0220.fb2bc.23fa@mx.google.com> https://github.com/DavidSpickett updated https://github.com/llvm/llvm-project/pull/68094 >From 3816b0fbc31825d3878b031a49fb78dd7c256278 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Oct 2023 11:44:17 +0100 Subject: [PATCH 1/2] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing FEAT_SME_FA64 (smefa64 in Linux cpuinfo) allows the use of the full A64 instruction set while in streaming SVE mode. See https://developer.arm.com/documentation/ddi0616/latest/ for details. This means for example if we want to write to the ffr register during or use floating point registers while in streaming mode, we need this extension. I initially was using QEMU which has it by default, and switched to Arm's FVP which does not. So this change adds a more strict check and converts most of the tests to use that. It would be possible in some cases to avoid the offending instructions but it would be a lot of effort and liable to fail randomly as the C library changes. It is also my assumption that the majority of systems will have smefa64 as QEMU has chosen to have. If I turn out to be wrong, we can make the effort to get the tests working without smefa64. --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 6 ++++++ .../aarch64_dynamic_regset/TestArm64DynamicRegsets.py | 4 ++-- .../rw_access_dynamic_resize/TestSVEThreadedDynamic.py | 10 ++++++---- .../rw_access_static_config/TestSVERegisters.py | 5 +++-- .../aarch64_sve_simd_registers/TestSVESIMDRegisters.py | 5 +++-- .../za_dynamic_resize/TestZAThreadedDynamic.py | 6 ++++-- .../aarch64_za_register/za_dynamic_resize/main.c | 1 + .../za_save_restore/TestZARegisterSaveRestore.py | 4 ++-- 8 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index c8670b208ec3f0c..2f4130d3ce68ae0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1271,6 +1271,12 @@ def isAArch64SVE(self): def isAArch64SME(self): return self.isAArch64() and "sme" in self.getCPUInfo() + def isAArch64SMEFA64(self): + # smefa64 allows the use of the full A64 instruction set in streaming + # mode. This is required by certain test programs to setup register + # state. + return self.isAArch64SME() and "smefa64" in self.getCPUInfo() + def isAArch64MTE(self): return self.isAArch64() and "mte" in self.getCPUInfo() diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index 2fb8b33126417c2..0ad69c268a9fd29 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -142,8 +142,8 @@ def make_za_value(self, vl, generator): def test_aarch64_dynamic_regset_config_sme(self): """Test AArch64 Dynamic Register sets configuration, but only SME registers.""" - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present") register_sets = self.setup_register_config_test("sme") diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index 8bcb76776459d01..b19039f0b5212b4 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -108,8 +108,9 @@ def run_sve_test(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be supported and the " + "smefa64 extension must be present.") self.build_for_mode(mode) @@ -201,8 +202,9 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. - if not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be present and the " + "smefa64 extension must be present.") self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 82b79b8d4b6cc2b..ac99652442b5ddd 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -85,8 +85,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index 814ca98369fca57..def93c78abc2745 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -41,8 +41,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py index 65d1071c26b2a34..884340b395a448d 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py @@ -65,8 +65,10 @@ def check_disabled_za_register(self, svg): self.expect("register read za", substrs=[self.gen_za_value(svg, lambda r: 0)]) def za_test_impl(self, enable_za): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + # Although the test program doesn't obviously do any operations that + # would need smefa64, calls to libc functions like memset may do. + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the sm3fa64 extension must be present") self.build() supported_vg = self.get_supported_vg() diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c index fd2590dbe411f7f..05839c26336cc8e 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c @@ -29,6 +29,7 @@ void set_za_register(int svl, int value_offset) { // you have. So setting one that didn't exist would actually set one that did. // That's why we need the streaming vector length here. for (int i = 0; i < svl; ++i) { + // This may involve instructions that require the smefa64 extension. memset(data, i + value_offset, MAX_VL_BYTES); // Each one of these loads a VL sized row of ZA. asm volatile("mov w12, %w0\n\t" diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py index 910966a0b3b0bc5..a647c91f71119ec 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py @@ -106,8 +106,8 @@ def check_za_disabled(self, vl): self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)]) def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present.") supported_svg = self.get_supported_svg() if len(supported_svg) < 2: >From 4fb5205f9ea5298c7826be8629acaea31fa32837 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Oct 2023 13:24:39 +0100 Subject: [PATCH 2/2] Fix python formatting. --- .../TestSVEThreadedDynamic.py | 12 ++++++++---- .../rw_access_static_config/TestSVERegisters.py | 6 ++++-- .../TestSVESIMDRegisters.py | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index b19039f0b5212b4..5d5914bef354655 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -109,8 +109,10 @@ def run_sve_test(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be supported and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be supported and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) @@ -203,8 +205,10 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. if not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be present and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be present and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index ac99652442b5ddd..f198d4716e8ee18 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -86,8 +86,10 @@ def skip_if_needed(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index def93c78abc2745..ce4c725714d23cc 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -42,8 +42,10 @@ def skip_if_needed(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) From lldb-commits at lists.llvm.org Tue Oct 3 05:26:06 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 05:26:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMP] Improve omp offload profiler (PR #68016) In-Reply-To: Message-ID: <651c085e.630a0220.19996.2b2a@mx.google.com> ================ @@ -79,15 +79,15 @@ struct TimeTraceProfilerEntry { // Calculate timings for FlameGraph. Cast time points to microsecond precision ---------------- fel-cab wrote: Removed this file from this PR https://github.com/llvm/llvm-project/pull/68016 From lldb-commits at lists.llvm.org Tue Oct 3 05:28:09 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 05:28:09 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <651c08d9.170a0220.f052f.2a6d@mx.google.com> github-actions[bot] wrote: :warning: Python code formatter, darker found issues in your code. :warning:
You can test this locally with the following command: ``````````bash darker --check --diff -r 9a408588d1b8b7899eff593c537de539a4a12651..3816b0fbc31825d3878b031a49fb78dd7c256278 lldb/packages/Python/lldbsuite/test/lldbtest.py lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py ``````````
View the diff from darker here. ``````````diff --- test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py 2023-10-03 12:14:52.000000 +0000 +++ test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py 2023-10-03 12:28:00.613685 +0000 @@ -107,12 +107,14 @@ def run_sve_test(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be supported and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be supported and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) supported_vg = self.get_supported_vg() @@ -201,12 +203,14 @@ self.run_sve_test(Mode.SSVE) def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. if not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be present and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be present and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) supported_vg = self.get_supported_vg() --- test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py 2023-10-03 12:14:52.000000 +0000 +++ test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py 2023-10-03 12:28:00.718448 +0000 @@ -84,12 +84,14 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) self.build(dictionary=self.get_build_flags(mode)) --- test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py 2023-10-03 12:14:52.000000 +0000 +++ test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py 2023-10-03 12:28:00.774799 +0000 @@ -40,12 +40,14 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) return "{{0x{:02x} {} 0x{:02x} {}}}".format(n, pad, n, pad) ``````````
https://github.com/llvm/llvm-project/pull/68094 From lldb-commits at lists.llvm.org Tue Oct 3 05:51:34 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 05:51:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651c0e56.170a0220.2b90c.2b70@mx.google.com> https://github.com/aokblast updated https://github.com/llvm/llvm-project/pull/67106 >From f4d7761c4abd2a16739a917ded31f90fdbbbf2fb Mon Sep 17 00:00:00 2001 From: SHENG-YI HONG Date: Tue, 3 Oct 2023 20:48:52 +0800 Subject: [PATCH] Add DynamicLoader Plugin Fore FreeBSD Kernel coredump This patch add dynamicloader plguin for freebsd kernel coredump on lldb. The implementation is by parsing linker_files structure and get all loaded kernel modules. This patch was part of FreeBSD's participation in Google Summer of Code 2023 --- .../Plugins/DynamicLoader/CMakeLists.txt | 1 + .../FreeBSD-Kernel/CMakeLists.txt | 13 + .../DynamicLoaderFreeBSDKernel.cpp | 789 ++++++++++++++++++ .../DynamicLoaderFreeBSDKernel.h | 171 ++++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 43 +- .../FreeBSDKernel/ProcessFreeBSDKernel.cpp | 4 +- 6 files changed, 1014 insertions(+), 7 deletions(-) create mode 100644 lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt create mode 100644 lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp create mode 100644 lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h diff --git a/lldb/source/Plugins/DynamicLoader/CMakeLists.txt b/lldb/source/Plugins/DynamicLoader/CMakeLists.txt index f357fea02efbe68..30607159acdc088 100644 --- a/lldb/source/Plugins/DynamicLoader/CMakeLists.txt +++ b/lldb/source/Plugins/DynamicLoader/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Darwin-Kernel) +add_subdirectory(FreeBSD-Kernel) add_subdirectory(MacOSX-DYLD) add_subdirectory(POSIX-DYLD) add_subdirectory(Static) diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt new file mode 100644 index 000000000000000..76daf0a327cf97b --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt @@ -0,0 +1,13 @@ +add_lldb_library(lldbPluginDynamicLoaderFreeBSDKernel PLUGIN + DynamicLoaderFreeBSDKernel.cpp + + LINK_LIBS + lldbBreakpoint + lldbCore + lldbHost + lldbInterpreter + lldbSymbol + lldbTarget + lldbUtility + lldbPluginObjectFileELF + ) diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp new file mode 100644 index 000000000000000..bbb83ff0a118400 --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp @@ -0,0 +1,789 @@ +//===-- DynamicLoaderFreeBSDKernel.cpp +//------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Breakpoint/StoppointCallbackContext.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Host/StreamFile.h" +#include "lldb/Interpreter/OptionValueProperties.h" +#include "lldb/Symbol/LocateSymbolFile.h" +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Target/OperatingSystem.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Target/Target.h" +#include "lldb/Target/Thread.h" +#include "lldb/Target/ThreadPlanRunToAddress.h" +#include "lldb/Utility/DataBuffer.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/State.h" + +#include "Plugins/ObjectFile/ELF/ObjectFileELF.h" + +#include "DynamicLoaderFreeBSDKernel.h" +#include +#include + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(DynamicLoaderFreeBSDKernel) + +void DynamicLoaderFreeBSDKernel::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + DebuggerInit); +} + +void DynamicLoaderFreeBSDKernel::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +llvm::StringRef DynamicLoaderFreeBSDKernel::GetPluginDescriptionStatic() { + return "The Dynamic Loader Plugin For FreeBSD Kernel"; +} + +static bool is_kernel(Module *module) { + if (!module) + return false; + + ObjectFile *objfile = module->GetObjectFile(); + if (!objfile) + return false; + if (objfile->GetType() != ObjectFile::eTypeExecutable) + return false; + if (objfile->GetStrata() != ObjectFile::eStrataUnknown && + objfile->GetStrata() != ObjectFile::eStrataKernel) + return false; + + return true; +} + +static bool is_kmod(Module *module) { + if (!module) + return false; + if (!module->GetObjectFile()) + return false; + ObjectFile *objfile = module->GetObjectFile(); + if (objfile->GetType() != ObjectFile::eTypeObjectFile && + objfile->GetType() != ObjectFile::eTypeSharedLibrary) + return false; + + return true; +} + +static bool is_reloc(Module *module) { + if (!module) + return false; + if (!module->GetObjectFile()) + return false; + ObjectFile *objfile = module->GetObjectFile(); + if (objfile->GetType() != ObjectFile::eTypeObjectFile) + return false; + + return true; +} + +// Instantiate Function of the FreeBSD Kernel Dynamic Loader Plugin called when +// Register the Plugin +DynamicLoader * +DynamicLoaderFreeBSDKernel::CreateInstance(lldb_private::Process *process, + bool force) { + // Check the environment when the plugin is not force loaded + Module *exec = process->GetTarget().GetExecutableModulePointer(); + if (exec && !is_kernel(exec)) { + return nullptr; + } + if (!force) { + // Check if the target is kernel + const llvm::Triple &triple_ref = + process->GetTarget().GetArchitecture().GetTriple(); + if (!triple_ref.isOSFreeBSD()) { + return nullptr; + } + } + + // At this point we have checked the target is a FreeBSD kernel and all we + // have to do is to find the kernel address + const addr_t kernel_address = FindFreeBSDKernel(process); + + if (CheckForKernelImageAtAddress(process, kernel_address).IsValid()) + return new DynamicLoaderFreeBSDKernel(process, kernel_address); + + return nullptr; +} + +addr_t +DynamicLoaderFreeBSDKernel::FindFreeBSDKernel(lldb_private::Process *process) { + addr_t kernel_addr = process->GetImageInfoAddress(); + if (kernel_addr == LLDB_INVALID_ADDRESS) + kernel_addr = FindKernelAtLoadAddress(process); + return kernel_addr; +} + +// Get the kernel address if the kernel is not loaded with a slide +addr_t DynamicLoaderFreeBSDKernel::FindKernelAtLoadAddress( + lldb_private::Process *process) { + Module *exe_module = process->GetTarget().GetExecutableModulePointer(); + + if (!is_kernel(exe_module)) + return LLDB_INVALID_ADDRESS; + + ObjectFile *exe_objfile = exe_module->GetObjectFile(); + + if (!exe_objfile->GetBaseAddress().IsValid()) + return LLDB_INVALID_ADDRESS; + + if (CheckForKernelImageAtAddress( + process, exe_objfile->GetBaseAddress().GetFileAddress()) + .IsValid()) + return exe_objfile->GetBaseAddress().GetFileAddress(); + + return LLDB_INVALID_ADDRESS; +} + +// Read ELF header from memry and return +bool DynamicLoaderFreeBSDKernel::ReadELFHeader(Process *process, + lldb::addr_t addr, + llvm::ELF::Elf32_Ehdr &header, + bool *read_error) { + Status error; + if (read_error) + *read_error = false; + + if (process->ReadMemory(addr, &header, sizeof(header), error) != + sizeof(header)) { + if (read_error) + *read_error = true; + return false; + } + + if (!header.checkMagic()) + return false; + + return true; +} + +// Check the correctness of Kernel and return UUID +lldb_private::UUID DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress( + Process *process, lldb::addr_t addr, bool *read_error) { + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (addr == LLDB_INVALID_ADDRESS) { + if (read_error) + *read_error = true; + return UUID(); + } + + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress: " + "looking for kernel binary at 0x%" PRIx64, + addr); + + llvm::ELF::Elf32_Ehdr header; + if (!ReadELFHeader(process, addr, header)) { + *read_error = true; + return UUID(); + } + + // Check header type + if (header.e_type != llvm::ELF::ET_EXEC) + return UUID(); + + ModuleSP memory_module_sp = + process->ReadModuleFromMemory(FileSpec("temp_freebsd_kernel"), addr); + + if (!memory_module_sp.get()) { + *read_error = true; + return UUID(); + } + + ObjectFile *exe_objfile = memory_module_sp->GetObjectFile(); + if (exe_objfile == nullptr) { + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress " + "found a binary at 0x%" PRIx64 + " but could not create an object file from memory", + addr); + return UUID(); + } + + // In here, I should check is_kernel for memory_module_sp + // However, the ReadModuleFromMemory reads wrong section so that this check + // will failed + ArchSpec kernel_arch(llvm::ELF::convertEMachineToArchName(header.e_machine)); + + if (!process->GetTarget().GetArchitecture().IsCompatibleMatch(kernel_arch)) + process->GetTarget().SetArchitecture(kernel_arch); + + std::string uuid_str; + if (memory_module_sp->GetUUID().IsValid()) { + uuid_str = "with UUID "; + uuid_str += memory_module_sp->GetUUID().GetAsString(); + } else { + uuid_str = "and no LC_UUID found in load commands "; + } + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress: " + "kernel binary image found at 0x%" PRIx64 " with arch '%s' %s", + addr, kernel_arch.GetTriple().str().c_str(), uuid_str.c_str()); + + return memory_module_sp->GetUUID(); +} + +void DynamicLoaderFreeBSDKernel::DebuggerInit( + lldb_private::Debugger &debugger) {} + +DynamicLoaderFreeBSDKernel::DynamicLoaderFreeBSDKernel(Process *process, + addr_t kernel_address) + : DynamicLoader(process), m_process(process), + m_linker_file_list_struct_addr(LLDB_INVALID_ADDRESS), + m_linker_file_head_addr(LLDB_INVALID_ADDRESS), + m_kernel_load_address(kernel_address), m_mutex() { + process->SetCanRunCode(false); +} + +DynamicLoaderFreeBSDKernel::~DynamicLoaderFreeBSDKernel() { Clear(true); } + +void DynamicLoaderFreeBSDKernel::Update() { + LoadKernelModules(); + SetNotificationBreakPoint(); +} + +// Create in memory Module at the load address +bool DynamicLoaderFreeBSDKernel::KModImageInfo::ReadMemoryModule( + lldb_private::Process *process) { + Log *log = GetLog(LLDBLog::DynamicLoader); + if (m_memory_module_sp) + return true; + if (m_load_address == LLDB_INVALID_ADDRESS) + return false; + + FileSpec file_spec(m_name); + + ModuleSP memory_module_sp; + + llvm::ELF::Elf32_Ehdr elf_eheader; + size_t size_to_read = 512; + + if (ReadELFHeader(process, m_load_address, elf_eheader)) { + if (elf_eheader.e_ident[llvm::ELF::EI_CLASS] == llvm::ELF::ELFCLASS32) { + size_to_read = sizeof(llvm::ELF::Elf32_Ehdr) + + elf_eheader.e_phnum * elf_eheader.e_phentsize; + } else if (elf_eheader.e_ident[llvm::ELF::EI_CLASS] == + llvm::ELF::ELFCLASS64) { + llvm::ELF::Elf64_Ehdr elf_eheader; + Status error; + if (process->ReadMemory(m_load_address, &elf_eheader, sizeof(elf_eheader), + error) == sizeof(elf_eheader)) + size_to_read = sizeof(llvm::ELF::Elf64_Ehdr) + + elf_eheader.e_phnum * elf_eheader.e_phentsize; + } + } + + memory_module_sp = + process->ReadModuleFromMemory(file_spec, m_load_address, size_to_read); + + if (!memory_module_sp) + return false; + + bool this_is_kernel = is_kernel(memory_module_sp.get()); + + if (!m_uuid.IsValid() && memory_module_sp->GetUUID().IsValid()) + m_uuid = memory_module_sp->GetUUID(); + + m_memory_module_sp = memory_module_sp; + m_is_kernel = this_is_kernel; + + // The kernel binary is from memory + if (this_is_kernel) { + LLDB_LOGF(log, "KextImageInfo::ReadMemoryModule read the kernel binary out " + "of memory"); + + if (memory_module_sp->GetArchitecture().IsValid()) + process->GetTarget().SetArchitecture(memory_module_sp->GetArchitecture()); + } + + return true; +} + +bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule( + lldb_private::Process *process) { + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (IsLoaded()) + return true; + + Target &target = process->GetTarget(); + + if (IsKernel() && m_uuid.IsValid()) { + Stream &s = target.GetDebugger().GetOutputStream(); + s.Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str()); + s.Printf("Load Address: 0x%" PRIx64 "\n", m_load_address); + } + + // Test if the module is loaded into the taget, + // maybe the module is loaded manually by user by doing target module add + // So that we have to create the module manually + if (!m_module_sp) { + const ModuleList &target_images = target.GetImages(); + m_module_sp = target_images.FindModule(m_uuid); + + // Search in the file system + if (!m_module_sp) { + ModuleSpec module_spec(FileSpec(GetPath()), target.GetArchitecture()); + if (IsKernel()) { + Status error; + if (Symbols::DownloadObjectAndSymbolFile(module_spec, error, true)) { + if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) + m_module_sp = std::make_shared(module_spec.GetFileSpec(), + target.GetArchitecture()); + } + } + + if (!m_module_sp) + m_module_sp = target.GetOrCreateModule(module_spec, true); + if (IsKernel() && !m_module_sp) { + Stream &s = target.GetDebugger().GetOutputStream(); + s.Printf("WARNING: Unable to locate kernel binary on the debugger " + "system.\n"); + } + } + + if (m_module_sp) { + // If the file is not kernel or kmod, the target should be loaded once and + // don't reload again + if (!IsKernel() && !is_kmod(m_module_sp.get())) { + ModuleSP existing_module_sp = target.GetImages().FindModule(m_uuid); + if (existing_module_sp && + existing_module_sp->IsLoadedInTarget(&target)) { + LLDB_LOGF(log, + "'%s' with UUID %s is not a kmod or kernel, and is " + "already registered in target, not loading.", + m_name.c_str(), m_uuid.GetAsString().c_str()); + return true; + } + } + m_uuid = m_module_sp->GetUUID(); + + // or append to the images + target.GetImages().AppendIfNeeded(m_module_sp, false); + } + } + + // If this file is relocatable kernel module(x86_64), adjust it's + // section(PT_LOAD segment) and return Because the kernel module's load + // address is the text section. lldb cannot create full memory module upon + // relocatable file So what we do is to set the load address only. + if (is_kmod(m_module_sp.get()) && is_reloc(m_module_sp.get())) { + m_stop_id = process->GetStopID(); + bool changed = false; + m_module_sp->SetLoadAddress(target, m_load_address, true, changed); + return true; + } + + if (m_module_sp) + ReadMemoryModule(process); + + // Calculate the slides of in memory module + if (!m_memory_module_sp || !m_module_sp) { + m_module_sp.reset(); + return false; + } + + ObjectFile *ondisk_object_file = m_module_sp->GetObjectFile(); + ObjectFile *memory_object_file = m_memory_module_sp->GetObjectFile(); + + if (!ondisk_object_file || !memory_object_file) + m_module_sp.reset(); + + // Find the slide address + addr_t fixed_slide = LLDB_INVALID_ADDRESS; + if (ObjectFileELF *memory_objfile_elf = + llvm::dyn_cast(memory_object_file)) { + addr_t load_address = memory_object_file->GetBaseAddress().GetFileAddress(); + + if (load_address != LLDB_INVALID_ADDRESS && + m_load_address != load_address) { + fixed_slide = m_load_address - load_address; + LLDB_LOGF(log, + "kmod %s in-memory LOAD vmaddr is not correct, using a " + "fixed slide of 0x%" PRIx64, + m_name.c_str(), fixed_slide); + } + } + + SectionList *ondisk_section_list = ondisk_object_file->GetSectionList(); + SectionList *memory_section_list = memory_object_file->GetSectionList(); + + if (memory_section_list && ondisk_object_file) { + const uint32_t num_ondisk_sections = ondisk_section_list->GetSize(); + uint32_t num_load_sections = 0; + + for (uint32_t section_idx = 0; section_idx < num_ondisk_sections; + ++section_idx) { + SectionSP on_disk_section_sp = + ondisk_section_list->GetSectionAtIndex(section_idx); + + if (!on_disk_section_sp) + continue; + if (fixed_slide != LLDB_INVALID_ADDRESS) { + target.SetSectionLoadAddress(on_disk_section_sp, + on_disk_section_sp->GetFileAddress() + + fixed_slide); + + } else { + const Section *memory_section = + memory_section_list + ->FindSectionByName(on_disk_section_sp->GetName()) + .get(); + if (memory_section) { + target.SetSectionLoadAddress(on_disk_section_sp, + memory_section->GetFileAddress()); + ++num_load_sections; + } + } + } + + if (num_load_sections) + m_stop_id = process->GetStopID(); + else + m_module_sp.reset(); + } else { + m_module_sp.reset(); + } + + if (IsLoaded() && m_module_sp && IsKernel()) { + Stream &s = target.GetDebugger().GetOutputStream(); + ObjectFile *kernel_object_file = m_module_sp->GetObjectFile(); + if (kernel_object_file) { + addr_t file_address = + kernel_object_file->GetBaseAddress().GetFileAddress(); + if (m_load_address != LLDB_INVALID_ADDRESS && + file_address != LLDB_INVALID_ADDRESS) { + s.Printf("Kernel slide 0x%" PRIx64 " in memory.\n", + m_load_address - file_address); + s.Printf("Loaded kernel file %s\n", + m_module_sp->GetFileSpec().GetPath().c_str()); + } + } + s.Flush(); + } + + return IsLoaded(); +} + +// This function is work for kernel file, others it wil reset load address and +// return false +bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingFileAddress( + lldb_private::Process *process) { + if (IsLoaded()) + return true; + + if (m_module_sp) { + bool changed = false; + if (m_module_sp->SetLoadAddress(process->GetTarget(), 0, true, changed)) + m_stop_id = process->GetStopID(); + } + + return false; +} + +// Get the head of found_list +bool DynamicLoaderFreeBSDKernel::ReadKmodsListHeader() { + std::lock_guard guard(m_mutex); + + if (m_linker_file_list_struct_addr.IsValid()) { + // Get tqh_first struct element from linker_files + Status error; + addr_t address = m_process->ReadPointerFromMemory( + m_linker_file_list_struct_addr.GetLoadAddress(&m_process->GetTarget()), + error); + if (address != LLDB_INVALID_ADDRESS && error.Success()) { + m_linker_file_head_addr = Address(address); + } else { + m_linker_file_list_struct_addr.Clear(); + return false; + } + + if (!m_linker_file_head_addr.IsValid() || + m_linker_file_head_addr.GetFileAddress() == 0) { + m_linker_file_list_struct_addr.Clear(); + return false; + } + } + return true; +} + +// Parse Kmod info in found_list +bool DynamicLoaderFreeBSDKernel::ParseKmods(Address linker_files_head_addr) { + std::lock_guard guard(m_mutex); + KModImageInfo::collection_type linker_files_list; + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (!ReadAllKmods(linker_files_head_addr, linker_files_list)) + return false; + LLDB_LOGF( + log, + "Kmod-changed breakpoint hit, there are %lu kernel modules currently.\n", + linker_files_list.size()); + + ModuleList &modules = m_process->GetTarget().GetImages(); + ModuleList remove_modules; + ModuleList add_modules; + + for (ModuleSP module : modules.Modules()) { + if (is_kernel(module.get())) + continue; + if (is_kmod(module.get())) + remove_modules.AppendIfNeeded(module); + } + + m_process->GetTarget().ModulesDidUnload(remove_modules, false); + + for (KModImageInfo &image_info : linker_files_list) { + if (m_kld_name_to_uuid.find(image_info.GetName()) != + m_kld_name_to_uuid.end()) + image_info.SetUUID(m_kld_name_to_uuid[image_info.GetName()]); + bool failed_to_load = false; + if (!image_info.LoadImageUsingMemoryModule(m_process)) { + image_info.LoadImageUsingFileAddress(m_process); + failed_to_load = true; + } else { + m_linker_files_list.push_back(image_info); + m_kld_name_to_uuid[image_info.GetName()] = image_info.GetUUID(); + } + + if (!failed_to_load) + add_modules.AppendIfNeeded(image_info.GetModule()); + } + m_process->GetTarget().ModulesDidLoad(add_modules); + return true; +} + +// Read all kmod from a given arrays of list +bool DynamicLoaderFreeBSDKernel::ReadAllKmods( + Address linker_files_head_addr, + KModImageInfo::collection_type &kmods_list) { + + // Get offset of next member and load address symbol + static ConstString kld_off_address_symbol_name("kld_off_address"); + static ConstString kld_off_next_symbol_name("kld_off_next"); + static ConstString kld_off_filename_symbol_name("kld_off_filename"); + static ConstString kld_off_pathname_symbol_name("kld_off_pathname"); + const Symbol *kld_off_address_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_address_symbol_name, eSymbolTypeData); + const Symbol *kld_off_next_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_next_symbol_name, eSymbolTypeData); + const Symbol *kld_off_filename_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_filename_symbol_name, eSymbolTypeData); + const Symbol *kld_off_pathname_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_pathname_symbol_name, eSymbolTypeData); + + if (!kld_off_address_symbol || !kld_off_next_symbol || + !kld_off_filename_symbol || !kld_off_pathname_symbol) + return false; + + Status error; + const int32_t kld_off_address = m_process->ReadSignedIntegerFromMemory( + kld_off_address_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + const int32_t kld_off_next = m_process->ReadSignedIntegerFromMemory( + kld_off_next_symbol->GetAddress().GetLoadAddress(&m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + const int32_t kld_off_filename = m_process->ReadSignedIntegerFromMemory( + kld_off_filename_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + + const int32_t kld_off_pathname = m_process->ReadSignedIntegerFromMemory( + kld_off_pathname_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + + // Parse KMods + addr_t kld_load_addr(LLDB_INVALID_ADDRESS); + char kld_filename[255]; + char kld_pathname[255]; + addr_t current_kld = + linker_files_head_addr.GetLoadAddress(&m_process->GetTarget()); + + while (current_kld != 0) { + addr_t kld_filename_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_filename, error); + if (error.Fail()) + return false; + addr_t kld_pathname_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_pathname, error); + if (error.Fail()) + return false; + + m_process->ReadCStringFromMemory(kld_filename_addr, kld_filename, + sizeof(kld_filename), error); + if (error.Fail()) + return false; + m_process->ReadCStringFromMemory(kld_pathname_addr, kld_pathname, + sizeof(kld_pathname), error); + if (error.Fail()) + return false; + kld_load_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_address, error); + if (error.Fail()) + return false; + + kmods_list.emplace_back(); + KModImageInfo &kmod_info = kmods_list.back(); + kmod_info.SetName(kld_filename); + kmod_info.SetLoadAddress(kld_load_addr); + kmod_info.SetPath(kld_pathname); + + current_kld = + m_process->ReadPointerFromMemory(current_kld + kld_off_next, error); + if (kmod_info.GetName() == "kernel") + kmods_list.pop_back(); + if (error.Fail()) + return false; + } + + return true; +} + +// Read all kmods +void DynamicLoaderFreeBSDKernel::ReadAllKmods() { + std::lock_guard guard(m_mutex); + + if (ReadKmodsListHeader()) { + if (m_linker_file_head_addr.IsValid()) { + if (!ParseKmods(m_linker_file_head_addr)) + m_linker_files_list.clear(); + } + } +} + +// Load all Kernel Modules +void DynamicLoaderFreeBSDKernel::LoadKernelModules() { + Log *log = GetLog(LLDBLog::DynamicLoader); + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::LoadKernelModules " + "Start loading Kernel Module"); + + // Initialize Kernel Image Information at the first time + if (m_kernel_image_info.GetLoadAddress() == LLDB_INVALID_ADDRESS) { + ModuleSP module_sp = m_process->GetTarget().GetExecutableModule(); + if (is_kernel(module_sp.get())) { + m_kernel_image_info.SetModule(module_sp); + m_kernel_image_info.SetIsKernel(true); + } + + // Set name for kernel + llvm::StringRef kernel_name("freebsd_kernel"); + module_sp = m_kernel_image_info.GetModule(); + if (module_sp.get() && module_sp->GetObjectFile() && + !module_sp->GetObjectFile()->GetFileSpec().GetFilename().IsEmpty()) + kernel_name = module_sp->GetObjectFile() + ->GetFileSpec() + .GetFilename() + .GetStringRef(); + m_kernel_image_info.SetName(kernel_name.data()); + + if (m_kernel_image_info.GetLoadAddress() == LLDB_INVALID_ADDRESS) { + m_kernel_image_info.SetLoadAddress(m_kernel_load_address); + } + + // Build In memory Module + if (m_kernel_image_info.GetLoadAddress() != LLDB_INVALID_ADDRESS) { + // If the kernel is not loaded in the memory, use file to load + if (!m_kernel_image_info.LoadImageUsingMemoryModule(m_process)) + m_kernel_image_info.LoadImageUsingFileAddress(m_process); + } + } + + LoadOperatingSystemPlugin(false); + + if (!m_kernel_image_info.IsLoaded() || !m_kernel_image_info.GetModule()) { + m_kernel_image_info.Clear(); + return; + } + + static ConstString modlist_symbol_name("linker_files"); + + const Symbol *symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + modlist_symbol_name, lldb::eSymbolTypeData); + + if (symbol) { + m_linker_file_list_struct_addr = symbol->GetAddress(); + ReadAllKmods(); + } else { + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::LoadKernelModules " + "cannot file modlist symbol"); + } +} + +// Update symbol when use kldload by setting callback function on kldload +void DynamicLoaderFreeBSDKernel::SetNotificationBreakPoint() {} + +// Hook called when attach to a process +void DynamicLoaderFreeBSDKernel::DidAttach() { + PrivateInitialize(m_process); + Update(); +} + +// Hook called after attach to a process +void DynamicLoaderFreeBSDKernel::DidLaunch() { + PrivateInitialize(m_process); + Update(); +} + +// Clear all member except kernel address +void DynamicLoaderFreeBSDKernel::Clear(bool clear_process) { + std::lock_guard guard(m_mutex); + if (clear_process) + m_process = nullptr; + m_linker_file_head_addr.Clear(); + m_linker_file_list_struct_addr.Clear(); + m_kernel_image_info.Clear(); + m_linker_files_list.clear(); +} + +// Reinitialize class +void DynamicLoaderFreeBSDKernel::PrivateInitialize(Process *process) { + Clear(true); + m_process = process; +} + +ThreadPlanSP DynamicLoaderFreeBSDKernel::GetStepThroughTrampolinePlan( + lldb_private::Thread &thread, bool stop_others) { + Log *log = GetLog(LLDBLog::Step); + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::GetStepThroughTrampolinePlan is " + "not yet implemented."); + return {}; +} + +Status DynamicLoaderFreeBSDKernel::CanLoadImage() { + Status error("shared object cannot be loaded into kernel"); + return error; +} diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h new file mode 100644 index 000000000000000..d8656e9c49dfe25 --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h @@ -0,0 +1,171 @@ +//===-- DynamicLoaderFreeBSDKernel.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_DYNAMICLOADER_FREEBSD_KERNEL_DYNAMICLOADERFREEBSDKERNEL_H +#define LLDB_SOURCE_PLUGINS_DYNAMICLOADER_FREEBSD_KERNEL_DYNAMICLOADERFREEBSDKERNEL_H + +#include +#include +#include + +#include "lldb/Target/DynamicLoader.h" +#include "lldb/Target/Process.h" +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/UUID.h" +#include "llvm/BinaryFormat/ELF.h" + +class DynamicLoaderFreeBSDKernel : public lldb_private::DynamicLoader { +public: + DynamicLoaderFreeBSDKernel(lldb_private::Process *process, + lldb::addr_t kernel_addr); + + ~DynamicLoaderFreeBSDKernel() override; + + // Static Functions + + static void Initialize(); + + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "freebsd-kernel"; } + + static llvm::StringRef GetPluginDescriptionStatic(); + + static lldb_private::DynamicLoader * + CreateInstance(lldb_private::Process *process, bool force); + + static void DebuggerInit(lldb_private::Debugger &debugger); + + static lldb::addr_t FindFreeBSDKernel(lldb_private::Process *process); + + // Hooks for time point that after attach to some proccess + void DidAttach() override; + + void DidLaunch() override; + + lldb::ThreadPlanSP GetStepThroughTrampolinePlan(lldb_private::Thread &thread, + bool stop_others) override; + + lldb_private::Status CanLoadImage() override; + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + +protected: + class KModImageInfo { + public: + KModImageInfo() + : m_module_sp(), m_memory_module_sp(), m_uuid(), m_name(), m_path() {} + + void Clear() { + m_load_address = LLDB_INVALID_ADDRESS; + m_name.clear(); + m_uuid.Clear(); + m_module_sp.reset(); + m_memory_module_sp.reset(); + m_stop_id = UINT32_MAX; + m_path.clear(); + } + + void SetLoadAddress(lldb::addr_t load_address) { + m_load_address = load_address; + } + + lldb::addr_t GetLoadAddress() const { return m_load_address; } + + void SetUUID(const lldb_private::UUID uuid) { m_uuid = uuid; } + + lldb_private::UUID GetUUID() const { return m_uuid; } + + void SetName(const char *name) { m_name = name; } + + std::string GetName() const { return m_name; } + + void SetPath(const char *path) { m_path = path; } + + std::string GetPath() const { return m_path; } + + void SetModule(lldb::ModuleSP module) { m_module_sp = module; } + + lldb::ModuleSP GetModule() { return m_module_sp; } + + void SetIsKernel(bool is_kernel) { m_is_kernel = is_kernel; } + + bool IsKernel() const { return m_is_kernel; }; + + void SetStopID(uint32_t stop_id) { m_stop_id = stop_id; } + + uint32_t GetStopID() { return m_stop_id; } + + bool IsLoaded() const { return m_stop_id != UINT32_MAX; }; + + bool ReadMemoryModule(lldb_private::Process *process); + + bool LoadImageUsingMemoryModule(lldb_private::Process *process); + + bool LoadImageUsingFileAddress(lldb_private::Process *process); + + using collection_type = std::vector; + + private: + lldb::ModuleSP m_module_sp; + lldb::ModuleSP m_memory_module_sp; + lldb::addr_t m_load_address = LLDB_INVALID_ADDRESS; + lldb_private::UUID m_uuid; + bool m_is_kernel = false; + std::string m_name; + std::string m_path; + uint32_t m_stop_id = UINT32_MAX; + }; + + void PrivateInitialize(lldb_private::Process *process); + + void Clear(bool clear_process); + + void Update(); + + void LoadKernelModules(); + + void ReadAllKmods(); + + bool ReadAllKmods(lldb_private::Address linker_files_head_address, + KModImageInfo::collection_type &kmods_list); + + bool ReadKmodsListHeader(); + + bool ParseKmods(lldb_private::Address linker_files_head_address); + + void SetNotificationBreakPoint(); + + static lldb_private::UUID + CheckForKernelImageAtAddress(lldb_private::Process *process, + lldb::addr_t address, + bool *read_error = nullptr); + + static lldb::addr_t FindKernelAtLoadAddress(lldb_private::Process *process); + + static bool ReadELFHeader(lldb_private::Process *process, + lldb::addr_t address, llvm::ELF::Elf32_Ehdr &header, + bool *read_error = nullptr); + + lldb_private::Process *m_process; + lldb_private::Address m_linker_file_list_struct_addr; + lldb_private::Address m_linker_file_head_addr; + lldb::addr_t m_kernel_load_address; + KModImageInfo m_kernel_image_info; + KModImageInfo::collection_type m_linker_files_list; + std::recursive_mutex m_mutex; + std::unordered_map m_kld_name_to_uuid; + +private: + DynamicLoaderFreeBSDKernel(const DynamicLoaderFreeBSDKernel &) = delete; + + const DynamicLoaderFreeBSDKernel & + operator=(const DynamicLoaderFreeBSDKernel &) = delete; +}; + +#endif diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 2da971dff895b4a..43ab87f08e19251 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -935,6 +935,16 @@ lldb_private::Address ObjectFileELF::GetEntryPointAddress() { } Address ObjectFileELF::GetBaseAddress() { + if (GetType() == ObjectFile::eTypeObjectFile) { + for (SectionHeaderCollIter I = std::next(m_section_headers.begin()); + I != m_section_headers.end(); ++I) { + const ELFSectionHeaderInfo &header = *I; + if (header.sh_flags & SHF_ALLOC) + return Address(GetSectionList()->FindSectionByID(SectionIndex(I)), 0); + } + return LLDB_INVALID_ADDRESS; + } + for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) { const ELFProgramHeader &H = EnumPHdr.value(); if (H.p_type != PT_LOAD) @@ -1764,7 +1774,12 @@ class VMAddressProvider { VMRange GetVMRange(const ELFSectionHeader &H) { addr_t Address = H.sh_addr; addr_t Size = H.sh_flags & SHF_ALLOC ? H.sh_size : 0; - if (ObjectType == ObjectFile::Type::eTypeObjectFile && Segments.empty() && (H.sh_flags & SHF_ALLOC)) { + + // When this is a debug file for relocatable file, the address is all zero + // and thus needs to use accumulate method + if ((ObjectType == ObjectFile::Type::eTypeObjectFile || + (ObjectType == ObjectFile::Type::eTypeDebugInfo && H.sh_addr == 0)) && + Segments.empty() && (H.sh_flags & SHF_ALLOC)) { NextVMAddress = llvm::alignTo(NextVMAddress, std::max(H.sh_addralign, 1)); Address = NextVMAddress; @@ -3454,10 +3469,28 @@ ObjectFile::Strata ObjectFileELF::CalculateStrata() { case llvm::ELF::ET_EXEC: // 2 - Executable file - // TODO: is there any way to detect that an executable is a kernel - // related executable by inspecting the program headers, section headers, - // symbols, or any other flag bits??? - return eStrataUser; + { + SectionList *section_list = GetSectionList(); + if (section_list) { + static ConstString loader_section_name(".interp"); + SectionSP loader_section = + section_list->FindSectionByName(loader_section_name); + if (loader_section) { + char buffer[256]; + size_t read_size = + ReadSectionData(loader_section.get(), 0, buffer, sizeof(buffer)); + + // We compare the content of .interp section + // It will contains \0 when counting read_size, so the size needs to + // decrease by one + llvm::StringRef loader_name(buffer, read_size - 1); + llvm::StringRef freebsd_kernel_loader_name("/red/herring"); + if (loader_name.equals(freebsd_kernel_loader_name)) + return eStrataKernel; + } + } + return eStrataUser; + } case llvm::ELF::ET_DYN: // 3 - Shared object file diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp index e3707365a9c3f19..601f5df43dbba4e 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp @@ -10,7 +10,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Target/DynamicLoader.h" -#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" +#include "Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h" #include "ProcessFreeBSDKernel.h" #include "ThreadFreeBSDKernel.h" @@ -262,7 +262,7 @@ Status ProcessFreeBSDKernel::DoLoadCore() { DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { if (m_dyld_up.get() == nullptr) m_dyld_up.reset(DynamicLoader::FindPlugin( - this, DynamicLoaderStatic::GetPluginNameStatic())); + this, DynamicLoaderFreeBSDKernel::GetPluginNameStatic())); return m_dyld_up.get(); } From lldb-commits at lists.llvm.org Tue Oct 3 07:45:22 2023 From: lldb-commits at lists.llvm.org (Jay Foad via lldb-commits) Date: Tue, 03 Oct 2023 07:45:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651c2902.170a0220.b190d.3988@mx.google.com> jayfoad wrote: I've just tested this on 10000 graphics shaders and it seems to make no difference at all. I tried gfx900 and gfx1100. Can anyone else from the graphics team confirm this? https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Tue Oct 3 08:03:35 2023 From: lldb-commits at lists.llvm.org (Joe Nash via lldb-commits) Date: Tue, 03 Oct 2023 08:03:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651c2d47.170a0220.6c294.3d83@mx.google.com> Sisyph wrote: > I've just tested this on 10000 graphics shaders and it seems to make no difference at all. I tried gfx900 and gfx1100. Can anyone else from the graphics team confirm this? I can confirm no difference on gfx1102 https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Tue Oct 3 08:38:06 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 08:38:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651c355e.170a0220.e5950.3cef@mx.google.com> ================ @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, ---------------- JDevlieghere wrote: - `attach_info` cannot be const because `Platform::Attach` also takes a non-cost `AttachInfo`. There's at least one place (`PlatformPOSIX::Attach`) that modifies the `attach_info`. - `target` is an out parameter so making that const would be confusing. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Tue Oct 3 08:41:52 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 08:41:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651c3640.050a0220.c2216.3a7f@mx.google.com> https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/68050 >From c83435474699ba6ca5ff57bcb1dacaef0987f4b4 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 3 Oct 2023 08:41:01 -0700 Subject: [PATCH] [lldb] Expose Platform::Attach through the SB API Expose Platform::Attach through the SB API. rdar://116188959 --- lldb/include/lldb/API/SBAttachInfo.h | 1 + lldb/include/lldb/API/SBDebugger.h | 1 + lldb/include/lldb/API/SBPlatform.h | 5 ++ lldb/include/lldb/API/SBProcess.h | 1 + .../Python/lldbsuite/test/gdbclientutils.py | 6 ++ lldb/source/API/SBPlatform.cpp | 25 ++++++++ .../gdb_remote_client/TestPlatformAttach.py | 58 +++++++++++++++++++ 7 files changed, 97 insertions(+) create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py diff --git a/lldb/include/lldb/API/SBAttachInfo.h b/lldb/include/lldb/API/SBAttachInfo.h index ea1145e625856f0..c18655fee77e0ac 100644 --- a/lldb/include/lldb/API/SBAttachInfo.h +++ b/lldb/include/lldb/API/SBAttachInfo.h @@ -197,6 +197,7 @@ class LLDB_API SBAttachInfo { protected: friend class SBTarget; + friend class SBPlatform; friend class lldb_private::ScriptInterpreter; diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 29cf2c16fad4bd7..218113a7a391f35 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -487,6 +487,7 @@ class LLDB_API SBDebugger { friend class SBProcess; friend class SBSourceManager; friend class SBStructuredData; + friend class SBPlatform; friend class SBTarget; friend class SBTrace; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 6567277a5d161e7..e0acc7003a54bc3 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -10,6 +10,7 @@ #define LLDB_API_SBPLATFORM_H #include "lldb/API/SBDefines.h" +#include "lldb/API/SBProcess.h" #include @@ -18,6 +19,7 @@ struct PlatformShellCommand; namespace lldb { +class SBAttachInfo; class SBLaunchInfo; class LLDB_API SBPlatformConnectOptions { @@ -149,6 +151,9 @@ class LLDB_API SBPlatform { SBError Launch(SBLaunchInfo &launch_info); + SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, + SBTarget &target, SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 16527bb0291fcb4..8c1c81418f83d12 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -449,6 +449,7 @@ class LLDB_API SBProcess { friend class SBExecutionContext; friend class SBFunction; friend class SBModule; + friend class SBPlatform; friend class SBTarget; friend class SBThread; friend class SBValue; diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index a0104d36df8d903..1784487323ad6be 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -196,6 +196,9 @@ def respond(self, packet): return self.vFile(packet) if packet.startswith("vRun;"): return self.vRun(packet) + if packet.startswith("qLaunchGDBServer;"): + _, host = packet.partition(";")[2].split(":") + return self.qLaunchGDBServer(host) if packet.startswith("qLaunchSuccess"): return self.qLaunchSuccess() if packet.startswith("QEnvironment:"): @@ -329,6 +332,9 @@ def vFile(self, packet): def vRun(self, packet): return "" + def qLaunchGDBServer(self, host): + raise self.UnexpectedPacketException() + def qLaunchSuccess(self): return "" diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index f8300a5bab30e41..c31848fe04ea72c 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBDebugger.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" #include "lldb/Target/Platform.h" @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, + SBError &error) { + LLDB_INSTRUMENT_VA(this, attach_info, debugger, target, error); + + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessAttachInfo &info = attach_info.ref(); + Status status; + ProcessSP process_sp = platform_sp->Attach(info, debugger.ref(), + target.GetSP().get(), status); + error.SetError(status); + return SBProcess(process_sp); + } + + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py new file mode 100644 index 000000000000000..d62e86b2a3c1d20 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -0,0 +1,58 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformAttach(GDBRemoteTestBase): + @skipIfRemote + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") + def test_attach(self): + """Test attaching by name""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self, port): + MockGDBServerResponder.__init__(self) + self.port = port + + def qLaunchGDBServer(self, _): + return "pid:1337;port:{};".format(self.port) + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + class MyGDBResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + + def vAttach(self, _): + return "OK" + + self.server.responder = MyGDBResponder() + port = self.server._socket._server_socket.getsockname()[1] + + platform_socket = TCPServerSocket() + platform_server = MockGDBServer(platform_socket) + platform_server.responder = MyPlatformResponder(port) + platform_server.start() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(platform_server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + attach_info = lldb.SBAttachInfo() + attach_info.SetExecutable("foo") + + target = lldb.SBTarget() + process = platform.Attach(attach_info, self.dbg, target, error) + self.assertSuccess(error) + self.assertEqual(process.GetProcessID(), 95117) + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Tue Oct 3 08:47:16 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Tue, 03 Oct 2023 08:47:16 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651c3784.170a0220.6448d.3cf0@mx.google.com> https://github.com/bulbazord approved this pull request. Lgtm https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Tue Oct 3 09:45:10 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Tue, 03 Oct 2023 09:45:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix --persistent-result description (PR #68128) Message-ID: https://github.com/kastiglione created https://github.com/llvm/llvm-project/pull/68128 The default is not static, it depends on context. For `expression`, the default is true, but for `dwim-print`, the default is false. >From 098ae1d2a09a406792e32e56b029a8e8a970630e Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Tue, 3 Oct 2023 09:38:25 -0700 Subject: [PATCH] [lldb] Fix --persistent-result description The default is not static, it depends on context. For `expression`, the default is true, but for `dwim-print`, the default is false. --- lldb/source/Commands/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..cfdeaabfa007743 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -390,7 +390,7 @@ let Command = "expression" in { Arg<"Boolean">, Desc<"Persist expression result in a variable for subsequent use. " "Expression results will be labeled with $-prefixed variables, e.g. $0, " - "$1, etc. Defaults to true.">; + "$1, etc.">; } let Command = "frame diag" in { From lldb-commits at lists.llvm.org Tue Oct 3 09:46:20 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Tue, 03 Oct 2023 09:46:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix --persistent-result description (PR #68128) In-Reply-To: Message-ID: <651c455c.170a0220.3fee6.3c04@mx.google.com> https://github.com/kastiglione edited https://github.com/llvm/llvm-project/pull/68128 From lldb-commits at lists.llvm.org Tue Oct 3 09:46:23 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 09:46:23 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix --persistent-result description (PR #68128) In-Reply-To: Message-ID: <651c455f.650a0220.e7737.2f8f@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes The default is not static, it depends on context. For `expression`, the default is true, but for `dwim-print`, the default is false. rdar://116320377 --- Full diff: https://github.com/llvm/llvm-project/pull/68128.diff 1 Files Affected: - (modified) lldb/source/Commands/Options.td (+1-1) ``````````diff diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..cfdeaabfa007743 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -390,7 +390,7 @@ let Command = "expression" in { Arg<"Boolean">, Desc<"Persist expression result in a variable for subsequent use. " "Expression results will be labeled with $-prefixed variables, e.g. $0, " - "$1, etc. Defaults to true.">; + "$1, etc.">; } let Command = "frame diag" in { ``````````
https://github.com/llvm/llvm-project/pull/68128 From lldb-commits at lists.llvm.org Tue Oct 3 09:50:29 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 09:50:29 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651c4655.170a0220.49483.4460@mx.google.com> aokblast wrote: I try to change the committer of my patch and I think it works now. https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Tue Oct 3 10:17:13 2023 From: lldb-commits at lists.llvm.org (David Stone via lldb-commits) Date: Tue, 03 Oct 2023 10:17:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] [clang][Modules] Move `ASTSourceDescriptor` into its own file (PR #67930) In-Reply-To: Message-ID: <651c4c99.170a0220.48ccb.4bbc@mx.google.com> https://github.com/davidstone updated https://github.com/llvm/llvm-project/pull/67930 >From 3cb09af262cf53cecb73e2da342a7891bc9fe853 Mon Sep 17 00:00:00 2001 From: David Stone Date: Sun, 1 Oct 2023 12:02:48 -0600 Subject: [PATCH] [clang][Modules] Move `ASTSourceDescriptor` into its own file --- .../include/clang/Basic/ASTSourceDescriptor.h | 52 +++++++++++++++++++ clang/include/clang/Basic/Module.h | 26 ---------- clang/lib/AST/ExternalASTSource.cpp | 2 +- clang/lib/Basic/ASTSourceDescriptor.cpp | 33 ++++++++++++ clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/Module.cpp | 15 ------ clang/lib/CodeGen/CGDebugInfo.h | 3 +- clang/lib/Serialization/ASTReader.cpp | 1 + .../Plugins/ExpressionParser/Clang/ASTUtils.h | 8 ++- .../Clang/ClangExternalASTSourceCallbacks.cpp | 1 + .../Clang/ClangExternalASTSourceCallbacks.h | 8 ++- 11 files changed, 105 insertions(+), 45 deletions(-) create mode 100644 clang/include/clang/Basic/ASTSourceDescriptor.h create mode 100644 clang/lib/Basic/ASTSourceDescriptor.cpp diff --git a/clang/include/clang/Basic/ASTSourceDescriptor.h b/clang/include/clang/Basic/ASTSourceDescriptor.h new file mode 100644 index 000000000000000..175e0551db76562 --- /dev/null +++ b/clang/include/clang/Basic/ASTSourceDescriptor.h @@ -0,0 +1,52 @@ +//===- ASTSourceDescriptor.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Defines the clang::ASTSourceDescriptor class, which abstracts clang modules +/// and precompiled header files +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_ASTSOURCEDESCRIPTOR_H +#define LLVM_CLANG_BASIC_ASTSOURCEDESCRIPTOR_H + +#include "clang/Basic/Module.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace clang { + +/// Abstracts clang modules and precompiled header files and holds +/// everything needed to generate debug info for an imported module +/// or PCH. +class ASTSourceDescriptor { + StringRef PCHModuleName; + StringRef Path; + StringRef ASTFile; + ASTFileSignature Signature; + Module *ClangModule = nullptr; + +public: + ASTSourceDescriptor() = default; + ASTSourceDescriptor(StringRef Name, StringRef Path, StringRef ASTFile, + ASTFileSignature Signature) + : PCHModuleName(std::move(Name)), Path(std::move(Path)), + ASTFile(std::move(ASTFile)), Signature(Signature) {} + ASTSourceDescriptor(Module &M); + + std::string getModuleName() const; + StringRef getPath() const { return Path; } + StringRef getASTFile() const { return ASTFile; } + ASTFileSignature getSignature() const { return Signature; } + Module *getModuleOrNull() const { return ClangModule; } +}; + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_ASTSOURCEDESCRIPTOR_H diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 676fd372493a3aa..60381472bbd4e59 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -850,32 +850,6 @@ class VisibleModuleSet { unsigned Generation = 0; }; -/// Abstracts clang modules and precompiled header files and holds -/// everything needed to generate debug info for an imported module -/// or PCH. -class ASTSourceDescriptor { - StringRef PCHModuleName; - StringRef Path; - StringRef ASTFile; - ASTFileSignature Signature; - Module *ClangModule = nullptr; - -public: - ASTSourceDescriptor() = default; - ASTSourceDescriptor(StringRef Name, StringRef Path, StringRef ASTFile, - ASTFileSignature Signature) - : PCHModuleName(std::move(Name)), Path(std::move(Path)), - ASTFile(std::move(ASTFile)), Signature(Signature) {} - ASTSourceDescriptor(Module &M); - - std::string getModuleName() const; - StringRef getPath() const { return Path; } - StringRef getASTFile() const { return ASTFile; } - ASTFileSignature getSignature() const { return Signature; } - Module *getModuleOrNull() const { return ClangModule; } -}; - - } // namespace clang #endif // LLVM_CLANG_BASIC_MODULE_H diff --git a/clang/lib/AST/ExternalASTSource.cpp b/clang/lib/AST/ExternalASTSource.cpp index 090ef02aa4224d6..00bc6b4b919abf7 100644 --- a/clang/lib/AST/ExternalASTSource.cpp +++ b/clang/lib/AST/ExternalASTSource.cpp @@ -15,10 +15,10 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclarationName.h" +#include "clang/Basic/ASTSourceDescriptor.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" -#include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/clang/lib/Basic/ASTSourceDescriptor.cpp b/clang/lib/Basic/ASTSourceDescriptor.cpp new file mode 100644 index 000000000000000..8072c08a51d3a3b --- /dev/null +++ b/clang/lib/Basic/ASTSourceDescriptor.cpp @@ -0,0 +1,33 @@ +//===- ASTSourceDescriptor.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Defines the clang::ASTSourceDescriptor class, which abstracts clang modules +/// and precompiled header files +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/ASTSourceDescriptor.h" + +namespace clang { + +ASTSourceDescriptor::ASTSourceDescriptor(Module &M) + : Signature(M.Signature), ClangModule(&M) { + if (M.Directory) + Path = M.Directory->getName(); + if (auto File = M.getASTFile()) + ASTFile = File->getName(); +} + +std::string ASTSourceDescriptor::getModuleName() const { + if (ClangModule) + return ClangModule->Name; + else + return std::string(PCHModuleName); +} + +} // namespace clang diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 36ccf7d0809453e..70a4f74f2d8edd8 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -54,6 +54,7 @@ if(CLANG_VENDOR) endif() add_clang_library(clangBasic + ASTSourceDescriptor.cpp Attributes.cpp Builtins.cpp CLWarnings.cpp diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index 0455304ef7f2b1a..71e2024d4635475 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -725,18 +725,3 @@ void VisibleModuleSet::makeTransitiveImportsVisible(Module *M, for (auto *I : M->Imports) setVisible(I, Loc, Vis, Cb); } - -ASTSourceDescriptor::ASTSourceDescriptor(Module &M) - : Signature(M.Signature), ClangModule(&M) { - if (M.Directory) - Path = M.Directory->getName(); - if (auto File = M.getASTFile()) - ASTFile = File->getName(); -} - -std::string ASTSourceDescriptor::getModuleName() const { - if (ClangModule) - return ClangModule->Name; - else - return std::string(PCHModuleName); -} diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index ae12485850ca775..32478ef225d6232 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -20,8 +20,8 @@ #include "clang/AST/PrettyPrinter.h" #include "clang/AST/Type.h" #include "clang/AST/TypeOrdering.h" +#include "clang/Basic/ASTSourceDescriptor.h" #include "clang/Basic/CodeGenOptions.h" -#include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -38,6 +38,7 @@ class MDNode; namespace clang { class ClassTemplateSpecializationDecl; class GlobalDecl; +class Module; class ModuleMap; class ObjCInterfaceDecl; class UsingDecl; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0c9c5992c267b39..82ef77f99308217 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -40,6 +40,7 @@ #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeLocVisitor.h" #include "clang/AST/UnresolvedSet.h" +#include "clang/Basic/ASTSourceDescriptor.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticError.h" diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h index 95e8a600f838243..92cbcd02d3ee2ba 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h @@ -9,13 +9,19 @@ #ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_ASTUTILS_H #define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_ASTUTILS_H -#include "clang/Basic/Module.h" +#include "clang/Basic/ASTSourceDescriptor.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/MultiplexExternalSemaSource.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaConsumer.h" #include +namespace clang { + +class Module; + +} // namespace clang + namespace lldb_private { /// Wraps an ExternalASTSource into an ExternalSemaSource. Doesn't take diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.cpp index 89d9ac042e57a31..e746e6afe39bead 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.cpp @@ -11,6 +11,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" +#include "clang/Basic/Module.h" #include using namespace lldb_private; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h index 219ed641615eb23..6bd18186a567d97 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h @@ -10,9 +10,15 @@ #define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGEXTERNALASTSOURCECALLBACKS_H #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" -#include "clang/Basic/Module.h" +#include "clang/Basic/ASTSourceDescriptor.h" #include +namespace clang { + +class Module; + +} // namespace clang + namespace lldb_private { class ClangExternalASTSourceCallbacks : public clang::ExternalASTSource { From lldb-commits at lists.llvm.org Tue Oct 3 10:19:59 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:19:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix --persistent-result description (PR #68128) In-Reply-To: Message-ID: <651c4d3f.170a0220.63cd2.4667@mx.google.com> https://github.com/JDevlieghere approved this pull request. https://github.com/llvm/llvm-project/pull/68128 From lldb-commits at lists.llvm.org Tue Oct 3 10:29:32 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:29:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c4f7c.170a0220.1cc90.4c67@mx.google.com> =?utf-8?q?Jos=C3=A9?= L. Junior , =?utf-8?q?Jos=C3=A9?= L. Junior Message-ID: In-Reply-To: https://github.com/JDevlieghere requested changes to this pull request. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:29:32 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:29:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c4f7c.170a0220.613b.4963@mx.google.com> =?utf-8?q?José?= L. Junior , =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: https://github.com/JDevlieghere edited https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:29:32 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:29:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c4f7c.620a0220.12817.445e@mx.google.com> =?utf-8?q?José?= L. Junior , =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: ================ @@ -95,21 +95,24 @@ class Language : public PluginInterface { class EitherTypeScavenger : public TypeScavenger { public: EitherTypeScavenger() : TypeScavenger() { - for (std::shared_ptr scavenger : { std::shared_ptr(new ScavengerTypes())... }) { + for (std::shared_ptr scavenger : + {std::shared_ptr(new ScavengerTypes())...}) { ---------------- JDevlieghere wrote: Can this use `make_shared`? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:29:34 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:29:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c4f7e.170a0220.f0dc5.4cae@mx.google.com> =?utf-8?q?José?= L. Junior , =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: ================ @@ -38,7 +41,13 @@ Status CommandOptionsProcessLaunch::SetOptionValue( case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; - + case 'm': // Stop at user entry point + { + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); ---------------- JDevlieghere wrote: There's another option below (`'a'`) that also needs the `target_sp`. Let's hoist this out of the switch and reuse it in all the options. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:29:34 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:29:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c4f7e.a70a0220.dcb2.4507@mx.google.com> =?utf-8?q?José?= L. Junior , =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); ---------------- JDevlieghere wrote: The `error` is a local variable which nobody is reading. Should this be an out parameter? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:32:24 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Tue, 03 Oct 2023 10:32:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651c5028.170a0220.41a70.4b17@mx.google.com> =?utf-8?q?José?= L. Junior , =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: ================ @@ -95,21 +95,24 @@ class Language : public PluginInterface { class EitherTypeScavenger : public TypeScavenger { public: EitherTypeScavenger() : TypeScavenger() { - for (std::shared_ptr scavenger : { std::shared_ptr(new ScavengerTypes())... }) { + for (std::shared_ptr scavenger : + {std::shared_ptr(new ScavengerTypes())...}) { ---------------- medismailben wrote: FWIW, these changes were not introduced by @junior-jl, they're probably the result of running `clang-format` on the whole file. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 10:33:12 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Tue, 03 Oct 2023 10:33:12 -0700 (PDT) Subject: [Lldb-commits] [lldb] dd76375 - [lldb][NFCI] Apply IndexEntry to DWARFUnitHeader outside of extraction Message-ID: <651c5058.a70a0220.444cb.4341@mx.google.com> Author: Alex Langford Date: 2023-10-03T10:27:18-07:00 New Revision: dd76375c80098be4d08b7e02290e39a8c1d00ab1 URL: https://github.com/llvm/llvm-project/commit/dd76375c80098be4d08b7e02290e39a8c1d00ab1 DIFF: https://github.com/llvm/llvm-project/commit/dd76375c80098be4d08b7e02290e39a8c1d00ab1.diff LOG: [lldb][NFCI] Apply IndexEntry to DWARFUnitHeader outside of extraction I plan on replacing LLDB's DWARFUnitHeader implementation with LLVM's. LLVM's DWARFUnitHeader::extract applies the DWARFUnitIndex::Entry to a given DWARFUnitHeader outside of the extraction because the index entry is only relevant to one place where we may parse DWARFUnitHeaders (specifically when we're creating a DWARFUnit in a DWO context). To ease the transition, I've reshaped LLDB's implementation to look closer to LLVM's. Reviewed By: aprantl, fdeazeve Differential Revision: https://reviews.llvm.org/D151919 Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 45e37b42f5e9566..b51cf04c7b724e5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -877,11 +877,37 @@ const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() { return *m_func_aranges_up; } -llvm::Expected -DWARFUnitHeader::extract(const DWARFDataExtractor &data, - DIERef::Section section, - lldb_private::DWARFContext &context, - lldb::offset_t *offset_ptr) { +llvm::Error DWARFUnitHeader::ApplyIndexEntry( + const llvm::DWARFUnitIndex::Entry *index_entry) { + // We should only be calling this function when the index entry is not set and + // we have a valid one to set it to. + assert(index_entry); + assert(!m_index_entry); + + if (m_abbr_offset) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "Package unit with a non-zero abbreviation offset"); + + auto *unit_contrib = index_entry->getContribution(); + if (!unit_contrib || unit_contrib->getLength32() != m_length + 4) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Inconsistent DWARF package unit index"); + + auto *abbr_entry = index_entry->getContribution(llvm::DW_SECT_ABBREV); + if (!abbr_entry) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "DWARF package index missing abbreviation column"); + + m_abbr_offset = abbr_entry->getOffset(); + m_index_entry = index_entry; + return llvm::Error::success(); +} + +llvm::Expected DWARFUnitHeader::extract( + const DWARFDataExtractor &data, DIERef::Section section, + lldb_private::DWARFContext &context, lldb::offset_t *offset_ptr) { DWARFUnitHeader header; header.m_offset = *offset_ptr; header.m_length = data.GetDWARFInitialLength(offset_ptr); @@ -905,42 +931,6 @@ DWARFUnitHeader::extract(const DWARFDataExtractor &data, header.m_type_offset = data.GetDWARFOffset(offset_ptr); } - if (context.isDwo()) { - const llvm::DWARFUnitIndex *Index; - if (header.IsTypeUnit()) { - Index = &context.GetAsLLVM().getTUIndex(); - if (*Index) - header.m_index_entry = Index->getFromHash(header.m_type_hash); - } else { - Index = &context.GetAsLLVM().getCUIndex(); - if (*Index && header.m_version >= 5 && header.m_dwo_id) - header.m_index_entry = Index->getFromHash(*header.m_dwo_id); - } - if (!header.m_index_entry) - header.m_index_entry = Index->getFromOffset(header.m_offset); - } - - if (header.m_index_entry) { - if (header.m_abbr_offset) { - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "Package unit with a non-zero abbreviation offset"); - } - auto *unit_contrib = header.m_index_entry->getContribution(); - if (!unit_contrib || unit_contrib->getLength32() != header.m_length + 4) { - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "Inconsistent DWARF package unit index"); - } - auto *abbr_entry = - header.m_index_entry->getContribution(llvm::DW_SECT_ABBREV); - if (!abbr_entry) { - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "DWARF package index missing abbreviation column"); - } - header.m_abbr_offset = abbr_entry->getOffset(); - } - bool length_OK = data.ValidOffset(header.GetNextUnitOffset() - 1); bool version_OK = SymbolFileDWARF::SupportedVersion(header.m_version); bool addr_size_OK = (header.m_addr_size == 2) || (header.m_addr_size == 4) || @@ -970,11 +960,30 @@ DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid, DIERef::Section section, lldb::offset_t *offset_ptr) { assert(debug_info.ValidOffset(*offset_ptr)); - auto expected_header = DWARFUnitHeader::extract( - debug_info, section, dwarf.GetDWARFContext(), offset_ptr); + DWARFContext &context = dwarf.GetDWARFContext(); + auto expected_header = + DWARFUnitHeader::extract(debug_info, section, context, offset_ptr); if (!expected_header) return expected_header.takeError(); + if (context.isDwo()) { + const llvm::DWARFUnitIndex::Entry *entry = nullptr; + const llvm::DWARFUnitIndex &index = expected_header->IsTypeUnit() + ? context.GetAsLLVM().getTUIndex() + : context.GetAsLLVM().getCUIndex(); + if (index) { + if (expected_header->IsTypeUnit()) + entry = index.getFromHash(expected_header->GetTypeHash()); + else if (auto dwo_id = expected_header->GetDWOId()) + entry = index.getFromHash(*dwo_id); + } + if (!entry) + entry = index.getFromOffset(expected_header->GetOffset()); + if (entry) + if (llvm::Error err = expected_header->ApplyIndexEntry(entry)) + return err; + } + const llvm::DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev(); if (!abbr) return llvm::make_error( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 004c01a37bb05e3..20871d805e77a87 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -76,6 +76,8 @@ class DWARFUnitHeader { } uint32_t GetNextUnitOffset() const { return m_offset + m_length + 4; } + llvm::Error ApplyIndexEntry(const llvm::DWARFUnitIndex::Entry *index_entry); + static llvm::Expected extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, lldb_private::DWARFContext &dwarf_context, From lldb-commits at lists.llvm.org Tue Oct 3 10:33:24 2023 From: lldb-commits at lists.llvm.org (Alex Langford via Phabricator via lldb-commits) Date: Tue, 03 Oct 2023 17:33:24 +0000 (UTC) Subject: [Lldb-commits] [PATCH] D151919: [lldb][NFCI] Apply IndexEntry to DWARFUnitHeader outside of extraction In-Reply-To: References: Message-ID: <980p-QAuRRKS9rmfEb3IZw@geopod-ismtpd-15> This revision was automatically updated to reflect the committed changes. Closed by commit rGdd76375c8009: [lldb][NFCI] Apply IndexEntry to DWARFUnitHeader outside of extraction (authored by bulbazord). Changed prior to commit: https://reviews.llvm.org/D151919?vs=529769&id=557568#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D151919/new/ https://reviews.llvm.org/D151919 Files: lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h -------------- next part -------------- A non-text attachment was scrubbed... Name: D151919.557568.patch Type: text/x-patch Size: 5614 bytes Desc: not available URL: From lldb-commits at lists.llvm.org Tue Oct 3 10:34:05 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 10:34:05 -0700 (PDT) Subject: [Lldb-commits] [lldb] 2da99a1 - [lldb] Expose Platform::Attach through the SB API (#68050) Message-ID: <651c508d.170a0220.1288f.421c@mx.google.com> Author: Jonas Devlieghere Date: 2023-10-03T10:34:00-07:00 New Revision: 2da99a11196246ab5f9787117f01b2251480607a URL: https://github.com/llvm/llvm-project/commit/2da99a11196246ab5f9787117f01b2251480607a DIFF: https://github.com/llvm/llvm-project/commit/2da99a11196246ab5f9787117f01b2251480607a.diff LOG: [lldb] Expose Platform::Attach through the SB API (#68050) Expose Platform::Attach through the SB API. rdar://116188959 Added: lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py Modified: lldb/include/lldb/API/SBAttachInfo.h lldb/include/lldb/API/SBDebugger.h lldb/include/lldb/API/SBPlatform.h lldb/include/lldb/API/SBProcess.h lldb/packages/Python/lldbsuite/test/gdbclientutils.py lldb/source/API/SBPlatform.cpp Removed: ################################################################################ diff --git a/lldb/include/lldb/API/SBAttachInfo.h b/lldb/include/lldb/API/SBAttachInfo.h index ea1145e625856f0..c18655fee77e0ac 100644 --- a/lldb/include/lldb/API/SBAttachInfo.h +++ b/lldb/include/lldb/API/SBAttachInfo.h @@ -197,6 +197,7 @@ class LLDB_API SBAttachInfo { protected: friend class SBTarget; + friend class SBPlatform; friend class lldb_private::ScriptInterpreter; diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 29cf2c16fad4bd7..218113a7a391f35 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -487,6 +487,7 @@ class LLDB_API SBDebugger { friend class SBProcess; friend class SBSourceManager; friend class SBStructuredData; + friend class SBPlatform; friend class SBTarget; friend class SBTrace; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 6567277a5d161e7..e0acc7003a54bc3 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -10,6 +10,7 @@ #define LLDB_API_SBPLATFORM_H #include "lldb/API/SBDefines.h" +#include "lldb/API/SBProcess.h" #include @@ -18,6 +19,7 @@ struct PlatformShellCommand; namespace lldb { +class SBAttachInfo; class SBLaunchInfo; class LLDB_API SBPlatformConnectOptions { @@ -149,6 +151,9 @@ class LLDB_API SBPlatform { SBError Launch(SBLaunchInfo &launch_info); + SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, + SBTarget &target, SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 16527bb0291fcb4..8c1c81418f83d12 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -449,6 +449,7 @@ class LLDB_API SBProcess { friend class SBExecutionContext; friend class SBFunction; friend class SBModule; + friend class SBPlatform; friend class SBTarget; friend class SBThread; friend class SBValue; diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index a0104d36df8d903..1784487323ad6be 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -196,6 +196,9 @@ def respond(self, packet): return self.vFile(packet) if packet.startswith("vRun;"): return self.vRun(packet) + if packet.startswith("qLaunchGDBServer;"): + _, host = packet.partition(";")[2].split(":") + return self.qLaunchGDBServer(host) if packet.startswith("qLaunchSuccess"): return self.qLaunchSuccess() if packet.startswith("QEnvironment:"): @@ -329,6 +332,9 @@ def vFile(self, packet): def vRun(self, packet): return "" + def qLaunchGDBServer(self, host): + raise self.UnexpectedPacketException() + def qLaunchSuccess(self): return "" diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index f8300a5bab30e41..c31848fe04ea72c 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBDebugger.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" #include "lldb/Target/Platform.h" @@ -574,6 +576,29 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { }); } +SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, + const SBDebugger &debugger, SBTarget &target, + SBError &error) { + LLDB_INSTRUMENT_VA(this, attach_info, debugger, target, error); + + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessAttachInfo &info = attach_info.ref(); + Status status; + ProcessSP process_sp = platform_sp->Attach(info, debugger.ref(), + target.GetSP().get(), status); + error.SetError(status); + return SBProcess(process_sp); + } + + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py new file mode 100644 index 000000000000000..d62e86b2a3c1d20 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -0,0 +1,58 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformAttach(GDBRemoteTestBase): + @skipIfRemote + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") + def test_attach(self): + """Test attaching by name""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self, port): + MockGDBServerResponder.__init__(self) + self.port = port + + def qLaunchGDBServer(self, _): + return "pid:1337;port:{};".format(self.port) + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + class MyGDBResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + + def vAttach(self, _): + return "OK" + + self.server.responder = MyGDBResponder() + port = self.server._socket._server_socket.getsockname()[1] + + platform_socket = TCPServerSocket() + platform_server = MockGDBServer(platform_socket) + platform_server.responder = MyPlatformResponder(port) + platform_server.start() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(platform_server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + attach_info = lldb.SBAttachInfo() + attach_info.SetExecutable("foo") + + target = lldb.SBTarget() + process = platform.Attach(attach_info, self.dbg, target, error) + self.assertSuccess(error) + self.assertEqual(process.GetProcessID(), 95117) + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Tue Oct 3 10:34:06 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Tue, 03 Oct 2023 10:34:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651c508e.050a0220.c38f1.4459@mx.google.com> https://github.com/JDevlieghere closed https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Tue Oct 3 10:54:32 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 10:54:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] Implement data formatters for LibStdC++ std::variant (PR #68012) In-Reply-To: Message-ID: <651c5558.170a0220.6e7ce.4a44@mx.google.com> jeffreytan81 wrote: Thanks for the heads-up. I have done some investigation. The test passes on my CentOS Linux and Macbook. It is unclear why it fails in this two bots. Unfortunately, I do not have the specific ubuntu machines to reproduce and logs do not provide enough clue. I will go a head and draft a fix to disable the failing test lines to unblock. > Hello, It looks like this broke 2 bots: lldb-aarch64-ubuntu : https://lab.llvm.org/buildbot/#/builders/96/builds/46436 lldb-arm-ubuntu : https://lab.llvm.org/buildbot/#/builders/17/builds/44011 Could you please take a look ? https://github.com/llvm/llvm-project/pull/68012 From lldb-commits at lists.llvm.org Tue Oct 3 10:55:03 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 10:55:03 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c5577.a70a0220.f923d.4a5b@mx.google.com> ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); ---------------- junior-jl wrote: Oh, that's true. If I understood correctly, the method should be `lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry(Status &error)` and in `CommandOptionsProcessLaunch.cpp`, it should be called with `target_sp->CreateBreakpointAtUserEntry(error);`. Am I correct? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 11:06:20 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:06:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651c581c.170a0220.40478.7c67@mx.google.com> shraiysh wrote: > Will the wrapper function stay or be removed? I would like to get it removed, because that is unnecessary (I did not realize this earlier while submitting the patch). But because nobody is actively reviewing #67723, I do not want to delay progress for the construct. So, I will keep updating that PR with updates to testcases while I wait on reviews. The functions should not need any further changes there. https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Tue Oct 3 11:06:41 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:06:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651c5831.620a0220.dd0f8.44f5@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/68042 >From c7c9e907d897ae667331761d8097ccb7852c5d93 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 16:43:13 -0500 Subject: [PATCH 1/2] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR This patch adds translation from `omp.teams` operation to LLVM IR using OpenMPIRBuilder. The clauses are not handled in this patch. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +++ mlir/test/Target/LLVMIR/openmp-teams.mlir | 136 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/openmp-teams.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8f7f1963b3e5a4f..b9643be40e13c01 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -661,6 +661,24 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, return bodyGenStatus; } +// Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder +static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + return op.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ + LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + builder.restoreIP(codegenIP); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + }; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + return bodyGenStatus; +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -2406,6 +2424,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TeamsOp op) { + return convertOmpTeams(op, builder, moduleTranslation); + }) .Case([&](omp::TaskOp op) { return convertOmpTaskOp(op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir new file mode 100644 index 000000000000000..c9005fca94a7c20 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_simple +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) +// CHECK: ret void +llvm.func @omp_teams_simple() { + omp.teams { + llvm.call @foo() : () -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlinedfn:.+]]() +// CHECK: call void @foo() +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr %[[global_tid:.+]], ptr %[[bound_tid:.+]]) +// CHECK: call void @[[outlinedfn]] +// CHECK: ret void + +// ----- + +llvm.func @foo(i32) -> () + +// CHECK-LABEL: @omp_teams_shared_simple +// CHECK-SAME: (i32 [[arg0:%.+]]) +// CHECK: [[structArg:%.+]] = alloca { i32 } +// CHECK: br +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: store i32 [[arg0]], ptr [[gep]] +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:.+]], ptr [[structArg]]) +// CHECK: ret void +llvm.func @omp_teams_shared_simple(%arg0: i32) { + omp.teams { + llvm.call @foo(%arg0) : (i32) -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[structArg:%.+]]) +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: [[loadgep:%.+]] = load i32, ptr [[gep]] +// CHECK: call void @foo(i32 [[loadgep]]) +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr [[global_tid:.+]], ptr [[bound_tid:.+]], ptr [[structArg:.+]]) +// CHECK: call void [[outlinedfn]](ptr [[structArg]]) +// CHECK: ret void + +// ----- + +llvm.func @my_alloca_fn() -> !llvm.ptr +llvm.func @foo(i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () +llvm.func @bar() + +// CHECK-LABEL: @omp_teams_branching_shared +// CHECK-SAME: (i1 [[condition:%.+]], i32 [[arg0:%.+]], float [[arg1:%.+]], ptr [[arg2:%.+]], fp128 [[arg3:%.+]]) + +// Checking that the allocation for struct argument happens in the alloca block. +// CHECK: [[structArg:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } +// CHECK: [[allocated:%.+]] = call ptr @my_alloca_fn() +// CHECK: [[loaded:%.+]] = load i32, ptr [[allocated]] +// CHECK: br label + +// Checking that the shared values are stored properly in the struct arg. +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]] +// CHECK: store i1 [[condition]], ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 1 +// CHECK: store i32 [[arg0]], ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 2 +// CHECK: store float [[arg1]], ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 3 +// CHECK: store ptr [[arg2]], ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 4 +// CHECK: store fp128 [[arg3]], ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 5 +// CHECK: store ptr [[allocated]], ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 6 +// CHECK: store i32 [[loaded]], ptr [[loadedPtr]] + +// Runtime call. +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:@.+]], ptr [[structArg]]) +// CHECK: br label +// CHECK: call void @bar() +// CHECK: ret void +llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %arg2: !llvm.ptr, %arg3: f128) { + %allocated = llvm.call @my_alloca_fn(): () -> !llvm.ptr + %loaded = llvm.load %allocated : !llvm.ptr + llvm.br ^codegenBlock +^codegenBlock: + omp.teams { + llvm.cond_br %condition, ^true_block, ^false_block + ^true_block: + llvm.call @foo(%arg0, %arg1, %arg2, %arg3, %allocated, %loaded) : (i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () + llvm.br ^exit + ^false_block: + llvm.br ^exit + ^exit: + omp.terminator + } + llvm.call @bar() : () -> () + llvm.return +} + +// Check the outlined function. +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[data:%.+]]) +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[data]] +// CHECK: [[condition:%.+]] = load i1, ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 1 +// CHECK: [[arg0:%.+]] = load i32, ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 2 +// CHECK: [[arg1:%.+]] = load float, ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 3 +// CHECK: [[arg2:%.+]] = load ptr, ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 4 +// CHECK: [[arg3:%.+]] = load fp128, ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 5 +// CHECK: [[allocated:%.+]] = load ptr, ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 6 +// CHECK: [[loaded:%.+]] = load i32, ptr [[loadedPtr]] +// CHECK: br label + +// CHECK: br i1 [[condition]], label %[[true:.+]], label %[[false:.+]] +// CHECK: [[false]]: +// CHECK-NEXT: br label +// CHECK: [[true]]: +// CHECK: call void @foo(i32 [[arg0]], float [[arg1]], ptr [[arg2]], fp128 [[arg3]], ptr [[allocated]], i32 [[loaded]]) +// CHECK-NEXT: br label +// CHECK: ret void + +// Check the wrapper function +// CHECK: define void [[wrapperfn]](ptr [[globalTID:%.+]], ptr [[boundTID:%.+]], ptr [[data:%.+]]) +// CHECK: call void [[outlinedfn]](ptr [[data]]) +// CHECK: ret void >From 4618cca227680ed3487961560f9419050890db55 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 20:48:45 -0500 Subject: [PATCH 2/2] Formatting --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b9643be40e13c01..9eefd1be8ad070e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -662,20 +662,27 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, } // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder -static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { +static LogicalResult +convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; LogicalResult bodyGenStatus = success(); - if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + if (op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || + op.getThreadLimit() || !op.getAllocatorsVars().empty() || + op.getReductions()) { return op.emitError("unhandled clauses for translation to LLVM IR"); } - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ - LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); builder.restoreIP(codegenIP); - convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, + moduleTranslation, bodyGenStatus); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); return bodyGenStatus; } From lldb-commits at lists.llvm.org Tue Oct 3 11:08:12 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:08:12 -0700 (PDT) Subject: [Lldb-commits] [lldb] Fix std::variant test failure on certain buildbot (PR #68139) Message-ID: https://github.com/jeffreytan81 created https://github.com/llvm/llvm-project/pull/68139 https://github.com/llvm/llvm-project/pull/68012 works on my CentOS Linux and Macbook but seems to fail for certain build bots. The error log complains "No Value" check failure for `std::variant` but not very actionable without a reproduce. To unblock the build bots, I am commenting out the "No Value" checks. >From 115c0f1dd53a06eac4bad9d651836d698b8cad69 Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Tue, 3 Oct 2023 11:03:59 -0700 Subject: [PATCH] Fix std::variant test failure on certain buildbot --- .../libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py index 7a433fea5feca23..96a9c8d30c45b00 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -60,6 +60,9 @@ def test_with_run_command(self): "frame variable v3", substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], ) + """ + TODO: temporarily disable No Value tests as they seem to fail on ubuntu/debian + bots. Pending reproduce and investigation. self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) @@ -67,3 +70,4 @@ def test_with_run_command(self): "frame variable v_many_types_no_value", substrs=["v_many_types_no_value = No Value"], ) + """ From lldb-commits at lists.llvm.org Tue Oct 3 11:08:20 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:08:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] Fix std::variant test failure on certain buildbot (PR #68139) In-Reply-To: Message-ID: <651c5894.170a0220.c60c4.5130@mx.google.com> https://github.com/jeffreytan81 ready_for_review https://github.com/llvm/llvm-project/pull/68139 From lldb-commits at lists.llvm.org Tue Oct 3 11:09:26 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:09:26 -0700 (PDT) Subject: [Lldb-commits] [lldb] Fix std::variant test failure on certain buildbot (PR #68139) In-Reply-To: Message-ID: <651c58d6.620a0220.46800.47a6@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes https://github.com/llvm/llvm-project/pull/68012 works on my CentOS Linux and Macbook but seems to fail for certain build bots. The error log complains "No Value" check failure for `std::variant` but not very actionable without a reproduce. To unblock the build bots, I am commenting out the "No Value" checks. --- Full diff: https://github.com/llvm/llvm-project/pull/68139.diff 1 Files Affected: - (modified) lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py (+4) ``````````diff diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py index 7a433fea5feca23..96a9c8d30c45b00 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -60,6 +60,9 @@ def test_with_run_command(self): "frame variable v3", substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], ) + """ + TODO: temporarily disable No Value tests as they seem to fail on ubuntu/debian + bots. Pending reproduce and investigation. self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) @@ -67,3 +70,4 @@ def test_with_run_command(self): "frame variable v_many_types_no_value", substrs=["v_many_types_no_value = No Value"], ) + """ ``````````
https://github.com/llvm/llvm-project/pull/68139 From lldb-commits at lists.llvm.org Tue Oct 3 11:14:12 2023 From: lldb-commits at lists.llvm.org (Stanislav Mekhanoshin via lldb-commits) Date: Tue, 03 Oct 2023 11:14:12 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651c59f4.170a0220.3772c.532e@mx.google.com> rampitec wrote: > > I've just tested this on 10000 graphics shaders and it seems to make no difference at all. I tried gfx900 and gfx1100. Can anyone else from the graphics team confirm this? > > I can confirm no difference on gfx1102 gfx11 is the same as gfx10, it just bails because of the VOP3 literal support. This is strange for gfx9. Do these shaders use -O2 or -O3? https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Tue Oct 3 11:16:38 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:16:38 -0700 (PDT) Subject: [Lldb-commits] [lldb] b3cc480 - [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (#67106) Message-ID: <651c5a86.630a0220.5f3d4.446a@mx.google.com> Author: aokblast Date: 2023-10-03T14:16:32-04:00 New Revision: b3cc4804d45d6b612ac9b3cc47ebbb0da44ebc60 URL: https://github.com/llvm/llvm-project/commit/b3cc4804d45d6b612ac9b3cc47ebbb0da44ebc60 DIFF: https://github.com/llvm/llvm-project/commit/b3cc4804d45d6b612ac9b3cc47ebbb0da44ebc60.diff LOG: [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (#67106) The implemtation support parsing kernel module for FreeBSD Kernel and has been test on x86-64 and arm64. In summary, this class parse the linked list resides in the kernel memory that record all kernel module and load the debug symbol file to facilitate debug process Added: lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h Modified: lldb/source/Plugins/DynamicLoader/CMakeLists.txt lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/DynamicLoader/CMakeLists.txt b/lldb/source/Plugins/DynamicLoader/CMakeLists.txt index f357fea02efbe68..30607159acdc088 100644 --- a/lldb/source/Plugins/DynamicLoader/CMakeLists.txt +++ b/lldb/source/Plugins/DynamicLoader/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Darwin-Kernel) +add_subdirectory(FreeBSD-Kernel) add_subdirectory(MacOSX-DYLD) add_subdirectory(POSIX-DYLD) add_subdirectory(Static) diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt new file mode 100644 index 000000000000000..76daf0a327cf97b --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/CMakeLists.txt @@ -0,0 +1,13 @@ +add_lldb_library(lldbPluginDynamicLoaderFreeBSDKernel PLUGIN + DynamicLoaderFreeBSDKernel.cpp + + LINK_LIBS + lldbBreakpoint + lldbCore + lldbHost + lldbInterpreter + lldbSymbol + lldbTarget + lldbUtility + lldbPluginObjectFileELF + ) diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp new file mode 100644 index 000000000000000..bbb83ff0a118400 --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp @@ -0,0 +1,789 @@ +//===-- DynamicLoaderFreeBSDKernel.cpp +//------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Breakpoint/StoppointCallbackContext.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Host/StreamFile.h" +#include "lldb/Interpreter/OptionValueProperties.h" +#include "lldb/Symbol/LocateSymbolFile.h" +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Target/OperatingSystem.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Target/Target.h" +#include "lldb/Target/Thread.h" +#include "lldb/Target/ThreadPlanRunToAddress.h" +#include "lldb/Utility/DataBuffer.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/State.h" + +#include "Plugins/ObjectFile/ELF/ObjectFileELF.h" + +#include "DynamicLoaderFreeBSDKernel.h" +#include +#include + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(DynamicLoaderFreeBSDKernel) + +void DynamicLoaderFreeBSDKernel::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + DebuggerInit); +} + +void DynamicLoaderFreeBSDKernel::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +llvm::StringRef DynamicLoaderFreeBSDKernel::GetPluginDescriptionStatic() { + return "The Dynamic Loader Plugin For FreeBSD Kernel"; +} + +static bool is_kernel(Module *module) { + if (!module) + return false; + + ObjectFile *objfile = module->GetObjectFile(); + if (!objfile) + return false; + if (objfile->GetType() != ObjectFile::eTypeExecutable) + return false; + if (objfile->GetStrata() != ObjectFile::eStrataUnknown && + objfile->GetStrata() != ObjectFile::eStrataKernel) + return false; + + return true; +} + +static bool is_kmod(Module *module) { + if (!module) + return false; + if (!module->GetObjectFile()) + return false; + ObjectFile *objfile = module->GetObjectFile(); + if (objfile->GetType() != ObjectFile::eTypeObjectFile && + objfile->GetType() != ObjectFile::eTypeSharedLibrary) + return false; + + return true; +} + +static bool is_reloc(Module *module) { + if (!module) + return false; + if (!module->GetObjectFile()) + return false; + ObjectFile *objfile = module->GetObjectFile(); + if (objfile->GetType() != ObjectFile::eTypeObjectFile) + return false; + + return true; +} + +// Instantiate Function of the FreeBSD Kernel Dynamic Loader Plugin called when +// Register the Plugin +DynamicLoader * +DynamicLoaderFreeBSDKernel::CreateInstance(lldb_private::Process *process, + bool force) { + // Check the environment when the plugin is not force loaded + Module *exec = process->GetTarget().GetExecutableModulePointer(); + if (exec && !is_kernel(exec)) { + return nullptr; + } + if (!force) { + // Check if the target is kernel + const llvm::Triple &triple_ref = + process->GetTarget().GetArchitecture().GetTriple(); + if (!triple_ref.isOSFreeBSD()) { + return nullptr; + } + } + + // At this point we have checked the target is a FreeBSD kernel and all we + // have to do is to find the kernel address + const addr_t kernel_address = FindFreeBSDKernel(process); + + if (CheckForKernelImageAtAddress(process, kernel_address).IsValid()) + return new DynamicLoaderFreeBSDKernel(process, kernel_address); + + return nullptr; +} + +addr_t +DynamicLoaderFreeBSDKernel::FindFreeBSDKernel(lldb_private::Process *process) { + addr_t kernel_addr = process->GetImageInfoAddress(); + if (kernel_addr == LLDB_INVALID_ADDRESS) + kernel_addr = FindKernelAtLoadAddress(process); + return kernel_addr; +} + +// Get the kernel address if the kernel is not loaded with a slide +addr_t DynamicLoaderFreeBSDKernel::FindKernelAtLoadAddress( + lldb_private::Process *process) { + Module *exe_module = process->GetTarget().GetExecutableModulePointer(); + + if (!is_kernel(exe_module)) + return LLDB_INVALID_ADDRESS; + + ObjectFile *exe_objfile = exe_module->GetObjectFile(); + + if (!exe_objfile->GetBaseAddress().IsValid()) + return LLDB_INVALID_ADDRESS; + + if (CheckForKernelImageAtAddress( + process, exe_objfile->GetBaseAddress().GetFileAddress()) + .IsValid()) + return exe_objfile->GetBaseAddress().GetFileAddress(); + + return LLDB_INVALID_ADDRESS; +} + +// Read ELF header from memry and return +bool DynamicLoaderFreeBSDKernel::ReadELFHeader(Process *process, + lldb::addr_t addr, + llvm::ELF::Elf32_Ehdr &header, + bool *read_error) { + Status error; + if (read_error) + *read_error = false; + + if (process->ReadMemory(addr, &header, sizeof(header), error) != + sizeof(header)) { + if (read_error) + *read_error = true; + return false; + } + + if (!header.checkMagic()) + return false; + + return true; +} + +// Check the correctness of Kernel and return UUID +lldb_private::UUID DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress( + Process *process, lldb::addr_t addr, bool *read_error) { + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (addr == LLDB_INVALID_ADDRESS) { + if (read_error) + *read_error = true; + return UUID(); + } + + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress: " + "looking for kernel binary at 0x%" PRIx64, + addr); + + llvm::ELF::Elf32_Ehdr header; + if (!ReadELFHeader(process, addr, header)) { + *read_error = true; + return UUID(); + } + + // Check header type + if (header.e_type != llvm::ELF::ET_EXEC) + return UUID(); + + ModuleSP memory_module_sp = + process->ReadModuleFromMemory(FileSpec("temp_freebsd_kernel"), addr); + + if (!memory_module_sp.get()) { + *read_error = true; + return UUID(); + } + + ObjectFile *exe_objfile = memory_module_sp->GetObjectFile(); + if (exe_objfile == nullptr) { + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress " + "found a binary at 0x%" PRIx64 + " but could not create an object file from memory", + addr); + return UUID(); + } + + // In here, I should check is_kernel for memory_module_sp + // However, the ReadModuleFromMemory reads wrong section so that this check + // will failed + ArchSpec kernel_arch(llvm::ELF::convertEMachineToArchName(header.e_machine)); + + if (!process->GetTarget().GetArchitecture().IsCompatibleMatch(kernel_arch)) + process->GetTarget().SetArchitecture(kernel_arch); + + std::string uuid_str; + if (memory_module_sp->GetUUID().IsValid()) { + uuid_str = "with UUID "; + uuid_str += memory_module_sp->GetUUID().GetAsString(); + } else { + uuid_str = "and no LC_UUID found in load commands "; + } + LLDB_LOGF(log, + "DynamicLoaderFreeBSDKernel::CheckForKernelImageAtAddress: " + "kernel binary image found at 0x%" PRIx64 " with arch '%s' %s", + addr, kernel_arch.GetTriple().str().c_str(), uuid_str.c_str()); + + return memory_module_sp->GetUUID(); +} + +void DynamicLoaderFreeBSDKernel::DebuggerInit( + lldb_private::Debugger &debugger) {} + +DynamicLoaderFreeBSDKernel::DynamicLoaderFreeBSDKernel(Process *process, + addr_t kernel_address) + : DynamicLoader(process), m_process(process), + m_linker_file_list_struct_addr(LLDB_INVALID_ADDRESS), + m_linker_file_head_addr(LLDB_INVALID_ADDRESS), + m_kernel_load_address(kernel_address), m_mutex() { + process->SetCanRunCode(false); +} + +DynamicLoaderFreeBSDKernel::~DynamicLoaderFreeBSDKernel() { Clear(true); } + +void DynamicLoaderFreeBSDKernel::Update() { + LoadKernelModules(); + SetNotificationBreakPoint(); +} + +// Create in memory Module at the load address +bool DynamicLoaderFreeBSDKernel::KModImageInfo::ReadMemoryModule( + lldb_private::Process *process) { + Log *log = GetLog(LLDBLog::DynamicLoader); + if (m_memory_module_sp) + return true; + if (m_load_address == LLDB_INVALID_ADDRESS) + return false; + + FileSpec file_spec(m_name); + + ModuleSP memory_module_sp; + + llvm::ELF::Elf32_Ehdr elf_eheader; + size_t size_to_read = 512; + + if (ReadELFHeader(process, m_load_address, elf_eheader)) { + if (elf_eheader.e_ident[llvm::ELF::EI_CLASS] == llvm::ELF::ELFCLASS32) { + size_to_read = sizeof(llvm::ELF::Elf32_Ehdr) + + elf_eheader.e_phnum * elf_eheader.e_phentsize; + } else if (elf_eheader.e_ident[llvm::ELF::EI_CLASS] == + llvm::ELF::ELFCLASS64) { + llvm::ELF::Elf64_Ehdr elf_eheader; + Status error; + if (process->ReadMemory(m_load_address, &elf_eheader, sizeof(elf_eheader), + error) == sizeof(elf_eheader)) + size_to_read = sizeof(llvm::ELF::Elf64_Ehdr) + + elf_eheader.e_phnum * elf_eheader.e_phentsize; + } + } + + memory_module_sp = + process->ReadModuleFromMemory(file_spec, m_load_address, size_to_read); + + if (!memory_module_sp) + return false; + + bool this_is_kernel = is_kernel(memory_module_sp.get()); + + if (!m_uuid.IsValid() && memory_module_sp->GetUUID().IsValid()) + m_uuid = memory_module_sp->GetUUID(); + + m_memory_module_sp = memory_module_sp; + m_is_kernel = this_is_kernel; + + // The kernel binary is from memory + if (this_is_kernel) { + LLDB_LOGF(log, "KextImageInfo::ReadMemoryModule read the kernel binary out " + "of memory"); + + if (memory_module_sp->GetArchitecture().IsValid()) + process->GetTarget().SetArchitecture(memory_module_sp->GetArchitecture()); + } + + return true; +} + +bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule( + lldb_private::Process *process) { + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (IsLoaded()) + return true; + + Target &target = process->GetTarget(); + + if (IsKernel() && m_uuid.IsValid()) { + Stream &s = target.GetDebugger().GetOutputStream(); + s.Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str()); + s.Printf("Load Address: 0x%" PRIx64 "\n", m_load_address); + } + + // Test if the module is loaded into the taget, + // maybe the module is loaded manually by user by doing target module add + // So that we have to create the module manually + if (!m_module_sp) { + const ModuleList &target_images = target.GetImages(); + m_module_sp = target_images.FindModule(m_uuid); + + // Search in the file system + if (!m_module_sp) { + ModuleSpec module_spec(FileSpec(GetPath()), target.GetArchitecture()); + if (IsKernel()) { + Status error; + if (Symbols::DownloadObjectAndSymbolFile(module_spec, error, true)) { + if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) + m_module_sp = std::make_shared(module_spec.GetFileSpec(), + target.GetArchitecture()); + } + } + + if (!m_module_sp) + m_module_sp = target.GetOrCreateModule(module_spec, true); + if (IsKernel() && !m_module_sp) { + Stream &s = target.GetDebugger().GetOutputStream(); + s.Printf("WARNING: Unable to locate kernel binary on the debugger " + "system.\n"); + } + } + + if (m_module_sp) { + // If the file is not kernel or kmod, the target should be loaded once and + // don't reload again + if (!IsKernel() && !is_kmod(m_module_sp.get())) { + ModuleSP existing_module_sp = target.GetImages().FindModule(m_uuid); + if (existing_module_sp && + existing_module_sp->IsLoadedInTarget(&target)) { + LLDB_LOGF(log, + "'%s' with UUID %s is not a kmod or kernel, and is " + "already registered in target, not loading.", + m_name.c_str(), m_uuid.GetAsString().c_str()); + return true; + } + } + m_uuid = m_module_sp->GetUUID(); + + // or append to the images + target.GetImages().AppendIfNeeded(m_module_sp, false); + } + } + + // If this file is relocatable kernel module(x86_64), adjust it's + // section(PT_LOAD segment) and return Because the kernel module's load + // address is the text section. lldb cannot create full memory module upon + // relocatable file So what we do is to set the load address only. + if (is_kmod(m_module_sp.get()) && is_reloc(m_module_sp.get())) { + m_stop_id = process->GetStopID(); + bool changed = false; + m_module_sp->SetLoadAddress(target, m_load_address, true, changed); + return true; + } + + if (m_module_sp) + ReadMemoryModule(process); + + // Calculate the slides of in memory module + if (!m_memory_module_sp || !m_module_sp) { + m_module_sp.reset(); + return false; + } + + ObjectFile *ondisk_object_file = m_module_sp->GetObjectFile(); + ObjectFile *memory_object_file = m_memory_module_sp->GetObjectFile(); + + if (!ondisk_object_file || !memory_object_file) + m_module_sp.reset(); + + // Find the slide address + addr_t fixed_slide = LLDB_INVALID_ADDRESS; + if (ObjectFileELF *memory_objfile_elf = + llvm::dyn_cast(memory_object_file)) { + addr_t load_address = memory_object_file->GetBaseAddress().GetFileAddress(); + + if (load_address != LLDB_INVALID_ADDRESS && + m_load_address != load_address) { + fixed_slide = m_load_address - load_address; + LLDB_LOGF(log, + "kmod %s in-memory LOAD vmaddr is not correct, using a " + "fixed slide of 0x%" PRIx64, + m_name.c_str(), fixed_slide); + } + } + + SectionList *ondisk_section_list = ondisk_object_file->GetSectionList(); + SectionList *memory_section_list = memory_object_file->GetSectionList(); + + if (memory_section_list && ondisk_object_file) { + const uint32_t num_ondisk_sections = ondisk_section_list->GetSize(); + uint32_t num_load_sections = 0; + + for (uint32_t section_idx = 0; section_idx < num_ondisk_sections; + ++section_idx) { + SectionSP on_disk_section_sp = + ondisk_section_list->GetSectionAtIndex(section_idx); + + if (!on_disk_section_sp) + continue; + if (fixed_slide != LLDB_INVALID_ADDRESS) { + target.SetSectionLoadAddress(on_disk_section_sp, + on_disk_section_sp->GetFileAddress() + + fixed_slide); + + } else { + const Section *memory_section = + memory_section_list + ->FindSectionByName(on_disk_section_sp->GetName()) + .get(); + if (memory_section) { + target.SetSectionLoadAddress(on_disk_section_sp, + memory_section->GetFileAddress()); + ++num_load_sections; + } + } + } + + if (num_load_sections) + m_stop_id = process->GetStopID(); + else + m_module_sp.reset(); + } else { + m_module_sp.reset(); + } + + if (IsLoaded() && m_module_sp && IsKernel()) { + Stream &s = target.GetDebugger().GetOutputStream(); + ObjectFile *kernel_object_file = m_module_sp->GetObjectFile(); + if (kernel_object_file) { + addr_t file_address = + kernel_object_file->GetBaseAddress().GetFileAddress(); + if (m_load_address != LLDB_INVALID_ADDRESS && + file_address != LLDB_INVALID_ADDRESS) { + s.Printf("Kernel slide 0x%" PRIx64 " in memory.\n", + m_load_address - file_address); + s.Printf("Loaded kernel file %s\n", + m_module_sp->GetFileSpec().GetPath().c_str()); + } + } + s.Flush(); + } + + return IsLoaded(); +} + +// This function is work for kernel file, others it wil reset load address and +// return false +bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingFileAddress( + lldb_private::Process *process) { + if (IsLoaded()) + return true; + + if (m_module_sp) { + bool changed = false; + if (m_module_sp->SetLoadAddress(process->GetTarget(), 0, true, changed)) + m_stop_id = process->GetStopID(); + } + + return false; +} + +// Get the head of found_list +bool DynamicLoaderFreeBSDKernel::ReadKmodsListHeader() { + std::lock_guard guard(m_mutex); + + if (m_linker_file_list_struct_addr.IsValid()) { + // Get tqh_first struct element from linker_files + Status error; + addr_t address = m_process->ReadPointerFromMemory( + m_linker_file_list_struct_addr.GetLoadAddress(&m_process->GetTarget()), + error); + if (address != LLDB_INVALID_ADDRESS && error.Success()) { + m_linker_file_head_addr = Address(address); + } else { + m_linker_file_list_struct_addr.Clear(); + return false; + } + + if (!m_linker_file_head_addr.IsValid() || + m_linker_file_head_addr.GetFileAddress() == 0) { + m_linker_file_list_struct_addr.Clear(); + return false; + } + } + return true; +} + +// Parse Kmod info in found_list +bool DynamicLoaderFreeBSDKernel::ParseKmods(Address linker_files_head_addr) { + std::lock_guard guard(m_mutex); + KModImageInfo::collection_type linker_files_list; + Log *log = GetLog(LLDBLog::DynamicLoader); + + if (!ReadAllKmods(linker_files_head_addr, linker_files_list)) + return false; + LLDB_LOGF( + log, + "Kmod-changed breakpoint hit, there are %lu kernel modules currently.\n", + linker_files_list.size()); + + ModuleList &modules = m_process->GetTarget().GetImages(); + ModuleList remove_modules; + ModuleList add_modules; + + for (ModuleSP module : modules.Modules()) { + if (is_kernel(module.get())) + continue; + if (is_kmod(module.get())) + remove_modules.AppendIfNeeded(module); + } + + m_process->GetTarget().ModulesDidUnload(remove_modules, false); + + for (KModImageInfo &image_info : linker_files_list) { + if (m_kld_name_to_uuid.find(image_info.GetName()) != + m_kld_name_to_uuid.end()) + image_info.SetUUID(m_kld_name_to_uuid[image_info.GetName()]); + bool failed_to_load = false; + if (!image_info.LoadImageUsingMemoryModule(m_process)) { + image_info.LoadImageUsingFileAddress(m_process); + failed_to_load = true; + } else { + m_linker_files_list.push_back(image_info); + m_kld_name_to_uuid[image_info.GetName()] = image_info.GetUUID(); + } + + if (!failed_to_load) + add_modules.AppendIfNeeded(image_info.GetModule()); + } + m_process->GetTarget().ModulesDidLoad(add_modules); + return true; +} + +// Read all kmod from a given arrays of list +bool DynamicLoaderFreeBSDKernel::ReadAllKmods( + Address linker_files_head_addr, + KModImageInfo::collection_type &kmods_list) { + + // Get offset of next member and load address symbol + static ConstString kld_off_address_symbol_name("kld_off_address"); + static ConstString kld_off_next_symbol_name("kld_off_next"); + static ConstString kld_off_filename_symbol_name("kld_off_filename"); + static ConstString kld_off_pathname_symbol_name("kld_off_pathname"); + const Symbol *kld_off_address_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_address_symbol_name, eSymbolTypeData); + const Symbol *kld_off_next_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_next_symbol_name, eSymbolTypeData); + const Symbol *kld_off_filename_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_filename_symbol_name, eSymbolTypeData); + const Symbol *kld_off_pathname_symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + kld_off_pathname_symbol_name, eSymbolTypeData); + + if (!kld_off_address_symbol || !kld_off_next_symbol || + !kld_off_filename_symbol || !kld_off_pathname_symbol) + return false; + + Status error; + const int32_t kld_off_address = m_process->ReadSignedIntegerFromMemory( + kld_off_address_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + const int32_t kld_off_next = m_process->ReadSignedIntegerFromMemory( + kld_off_next_symbol->GetAddress().GetLoadAddress(&m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + const int32_t kld_off_filename = m_process->ReadSignedIntegerFromMemory( + kld_off_filename_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + + const int32_t kld_off_pathname = m_process->ReadSignedIntegerFromMemory( + kld_off_pathname_symbol->GetAddress().GetLoadAddress( + &m_process->GetTarget()), + 4, 0, error); + if (error.Fail()) + return false; + + // Parse KMods + addr_t kld_load_addr(LLDB_INVALID_ADDRESS); + char kld_filename[255]; + char kld_pathname[255]; + addr_t current_kld = + linker_files_head_addr.GetLoadAddress(&m_process->GetTarget()); + + while (current_kld != 0) { + addr_t kld_filename_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_filename, error); + if (error.Fail()) + return false; + addr_t kld_pathname_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_pathname, error); + if (error.Fail()) + return false; + + m_process->ReadCStringFromMemory(kld_filename_addr, kld_filename, + sizeof(kld_filename), error); + if (error.Fail()) + return false; + m_process->ReadCStringFromMemory(kld_pathname_addr, kld_pathname, + sizeof(kld_pathname), error); + if (error.Fail()) + return false; + kld_load_addr = + m_process->ReadPointerFromMemory(current_kld + kld_off_address, error); + if (error.Fail()) + return false; + + kmods_list.emplace_back(); + KModImageInfo &kmod_info = kmods_list.back(); + kmod_info.SetName(kld_filename); + kmod_info.SetLoadAddress(kld_load_addr); + kmod_info.SetPath(kld_pathname); + + current_kld = + m_process->ReadPointerFromMemory(current_kld + kld_off_next, error); + if (kmod_info.GetName() == "kernel") + kmods_list.pop_back(); + if (error.Fail()) + return false; + } + + return true; +} + +// Read all kmods +void DynamicLoaderFreeBSDKernel::ReadAllKmods() { + std::lock_guard guard(m_mutex); + + if (ReadKmodsListHeader()) { + if (m_linker_file_head_addr.IsValid()) { + if (!ParseKmods(m_linker_file_head_addr)) + m_linker_files_list.clear(); + } + } +} + +// Load all Kernel Modules +void DynamicLoaderFreeBSDKernel::LoadKernelModules() { + Log *log = GetLog(LLDBLog::DynamicLoader); + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::LoadKernelModules " + "Start loading Kernel Module"); + + // Initialize Kernel Image Information at the first time + if (m_kernel_image_info.GetLoadAddress() == LLDB_INVALID_ADDRESS) { + ModuleSP module_sp = m_process->GetTarget().GetExecutableModule(); + if (is_kernel(module_sp.get())) { + m_kernel_image_info.SetModule(module_sp); + m_kernel_image_info.SetIsKernel(true); + } + + // Set name for kernel + llvm::StringRef kernel_name("freebsd_kernel"); + module_sp = m_kernel_image_info.GetModule(); + if (module_sp.get() && module_sp->GetObjectFile() && + !module_sp->GetObjectFile()->GetFileSpec().GetFilename().IsEmpty()) + kernel_name = module_sp->GetObjectFile() + ->GetFileSpec() + .GetFilename() + .GetStringRef(); + m_kernel_image_info.SetName(kernel_name.data()); + + if (m_kernel_image_info.GetLoadAddress() == LLDB_INVALID_ADDRESS) { + m_kernel_image_info.SetLoadAddress(m_kernel_load_address); + } + + // Build In memory Module + if (m_kernel_image_info.GetLoadAddress() != LLDB_INVALID_ADDRESS) { + // If the kernel is not loaded in the memory, use file to load + if (!m_kernel_image_info.LoadImageUsingMemoryModule(m_process)) + m_kernel_image_info.LoadImageUsingFileAddress(m_process); + } + } + + LoadOperatingSystemPlugin(false); + + if (!m_kernel_image_info.IsLoaded() || !m_kernel_image_info.GetModule()) { + m_kernel_image_info.Clear(); + return; + } + + static ConstString modlist_symbol_name("linker_files"); + + const Symbol *symbol = + m_kernel_image_info.GetModule()->FindFirstSymbolWithNameAndType( + modlist_symbol_name, lldb::eSymbolTypeData); + + if (symbol) { + m_linker_file_list_struct_addr = symbol->GetAddress(); + ReadAllKmods(); + } else { + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::LoadKernelModules " + "cannot file modlist symbol"); + } +} + +// Update symbol when use kldload by setting callback function on kldload +void DynamicLoaderFreeBSDKernel::SetNotificationBreakPoint() {} + +// Hook called when attach to a process +void DynamicLoaderFreeBSDKernel::DidAttach() { + PrivateInitialize(m_process); + Update(); +} + +// Hook called after attach to a process +void DynamicLoaderFreeBSDKernel::DidLaunch() { + PrivateInitialize(m_process); + Update(); +} + +// Clear all member except kernel address +void DynamicLoaderFreeBSDKernel::Clear(bool clear_process) { + std::lock_guard guard(m_mutex); + if (clear_process) + m_process = nullptr; + m_linker_file_head_addr.Clear(); + m_linker_file_list_struct_addr.Clear(); + m_kernel_image_info.Clear(); + m_linker_files_list.clear(); +} + +// Reinitialize class +void DynamicLoaderFreeBSDKernel::PrivateInitialize(Process *process) { + Clear(true); + m_process = process; +} + +ThreadPlanSP DynamicLoaderFreeBSDKernel::GetStepThroughTrampolinePlan( + lldb_private::Thread &thread, bool stop_others) { + Log *log = GetLog(LLDBLog::Step); + LLDB_LOGF(log, "DynamicLoaderFreeBSDKernel::GetStepThroughTrampolinePlan is " + "not yet implemented."); + return {}; +} + +Status DynamicLoaderFreeBSDKernel::CanLoadImage() { + Status error("shared object cannot be loaded into kernel"); + return error; +} diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h new file mode 100644 index 000000000000000..d8656e9c49dfe25 --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h @@ -0,0 +1,171 @@ +//===-- DynamicLoaderFreeBSDKernel.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_DYNAMICLOADER_FREEBSD_KERNEL_DYNAMICLOADERFREEBSDKERNEL_H +#define LLDB_SOURCE_PLUGINS_DYNAMICLOADER_FREEBSD_KERNEL_DYNAMICLOADERFREEBSDKERNEL_H + +#include +#include +#include + +#include "lldb/Target/DynamicLoader.h" +#include "lldb/Target/Process.h" +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/UUID.h" +#include "llvm/BinaryFormat/ELF.h" + +class DynamicLoaderFreeBSDKernel : public lldb_private::DynamicLoader { +public: + DynamicLoaderFreeBSDKernel(lldb_private::Process *process, + lldb::addr_t kernel_addr); + + ~DynamicLoaderFreeBSDKernel() override; + + // Static Functions + + static void Initialize(); + + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "freebsd-kernel"; } + + static llvm::StringRef GetPluginDescriptionStatic(); + + static lldb_private::DynamicLoader * + CreateInstance(lldb_private::Process *process, bool force); + + static void DebuggerInit(lldb_private::Debugger &debugger); + + static lldb::addr_t FindFreeBSDKernel(lldb_private::Process *process); + + // Hooks for time point that after attach to some proccess + void DidAttach() override; + + void DidLaunch() override; + + lldb::ThreadPlanSP GetStepThroughTrampolinePlan(lldb_private::Thread &thread, + bool stop_others) override; + + lldb_private::Status CanLoadImage() override; + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + +protected: + class KModImageInfo { + public: + KModImageInfo() + : m_module_sp(), m_memory_module_sp(), m_uuid(), m_name(), m_path() {} + + void Clear() { + m_load_address = LLDB_INVALID_ADDRESS; + m_name.clear(); + m_uuid.Clear(); + m_module_sp.reset(); + m_memory_module_sp.reset(); + m_stop_id = UINT32_MAX; + m_path.clear(); + } + + void SetLoadAddress(lldb::addr_t load_address) { + m_load_address = load_address; + } + + lldb::addr_t GetLoadAddress() const { return m_load_address; } + + void SetUUID(const lldb_private::UUID uuid) { m_uuid = uuid; } + + lldb_private::UUID GetUUID() const { return m_uuid; } + + void SetName(const char *name) { m_name = name; } + + std::string GetName() const { return m_name; } + + void SetPath(const char *path) { m_path = path; } + + std::string GetPath() const { return m_path; } + + void SetModule(lldb::ModuleSP module) { m_module_sp = module; } + + lldb::ModuleSP GetModule() { return m_module_sp; } + + void SetIsKernel(bool is_kernel) { m_is_kernel = is_kernel; } + + bool IsKernel() const { return m_is_kernel; }; + + void SetStopID(uint32_t stop_id) { m_stop_id = stop_id; } + + uint32_t GetStopID() { return m_stop_id; } + + bool IsLoaded() const { return m_stop_id != UINT32_MAX; }; + + bool ReadMemoryModule(lldb_private::Process *process); + + bool LoadImageUsingMemoryModule(lldb_private::Process *process); + + bool LoadImageUsingFileAddress(lldb_private::Process *process); + + using collection_type = std::vector; + + private: + lldb::ModuleSP m_module_sp; + lldb::ModuleSP m_memory_module_sp; + lldb::addr_t m_load_address = LLDB_INVALID_ADDRESS; + lldb_private::UUID m_uuid; + bool m_is_kernel = false; + std::string m_name; + std::string m_path; + uint32_t m_stop_id = UINT32_MAX; + }; + + void PrivateInitialize(lldb_private::Process *process); + + void Clear(bool clear_process); + + void Update(); + + void LoadKernelModules(); + + void ReadAllKmods(); + + bool ReadAllKmods(lldb_private::Address linker_files_head_address, + KModImageInfo::collection_type &kmods_list); + + bool ReadKmodsListHeader(); + + bool ParseKmods(lldb_private::Address linker_files_head_address); + + void SetNotificationBreakPoint(); + + static lldb_private::UUID + CheckForKernelImageAtAddress(lldb_private::Process *process, + lldb::addr_t address, + bool *read_error = nullptr); + + static lldb::addr_t FindKernelAtLoadAddress(lldb_private::Process *process); + + static bool ReadELFHeader(lldb_private::Process *process, + lldb::addr_t address, llvm::ELF::Elf32_Ehdr &header, + bool *read_error = nullptr); + + lldb_private::Process *m_process; + lldb_private::Address m_linker_file_list_struct_addr; + lldb_private::Address m_linker_file_head_addr; + lldb::addr_t m_kernel_load_address; + KModImageInfo m_kernel_image_info; + KModImageInfo::collection_type m_linker_files_list; + std::recursive_mutex m_mutex; + std::unordered_map m_kld_name_to_uuid; + +private: + DynamicLoaderFreeBSDKernel(const DynamicLoaderFreeBSDKernel &) = delete; + + const DynamicLoaderFreeBSDKernel & + operator=(const DynamicLoaderFreeBSDKernel &) = delete; +}; + +#endif diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 2da971dff895b4a..43ab87f08e19251 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -935,6 +935,16 @@ lldb_private::Address ObjectFileELF::GetEntryPointAddress() { } Address ObjectFileELF::GetBaseAddress() { + if (GetType() == ObjectFile::eTypeObjectFile) { + for (SectionHeaderCollIter I = std::next(m_section_headers.begin()); + I != m_section_headers.end(); ++I) { + const ELFSectionHeaderInfo &header = *I; + if (header.sh_flags & SHF_ALLOC) + return Address(GetSectionList()->FindSectionByID(SectionIndex(I)), 0); + } + return LLDB_INVALID_ADDRESS; + } + for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) { const ELFProgramHeader &H = EnumPHdr.value(); if (H.p_type != PT_LOAD) @@ -1764,7 +1774,12 @@ class VMAddressProvider { VMRange GetVMRange(const ELFSectionHeader &H) { addr_t Address = H.sh_addr; addr_t Size = H.sh_flags & SHF_ALLOC ? H.sh_size : 0; - if (ObjectType == ObjectFile::Type::eTypeObjectFile && Segments.empty() && (H.sh_flags & SHF_ALLOC)) { + + // When this is a debug file for relocatable file, the address is all zero + // and thus needs to use accumulate method + if ((ObjectType == ObjectFile::Type::eTypeObjectFile || + (ObjectType == ObjectFile::Type::eTypeDebugInfo && H.sh_addr == 0)) && + Segments.empty() && (H.sh_flags & SHF_ALLOC)) { NextVMAddress = llvm::alignTo(NextVMAddress, std::max(H.sh_addralign, 1)); Address = NextVMAddress; @@ -3454,10 +3469,28 @@ ObjectFile::Strata ObjectFileELF::CalculateStrata() { case llvm::ELF::ET_EXEC: // 2 - Executable file - // TODO: is there any way to detect that an executable is a kernel - // related executable by inspecting the program headers, section headers, - // symbols, or any other flag bits??? - return eStrataUser; + { + SectionList *section_list = GetSectionList(); + if (section_list) { + static ConstString loader_section_name(".interp"); + SectionSP loader_section = + section_list->FindSectionByName(loader_section_name); + if (loader_section) { + char buffer[256]; + size_t read_size = + ReadSectionData(loader_section.get(), 0, buffer, sizeof(buffer)); + + // We compare the content of .interp section + // It will contains \0 when counting read_size, so the size needs to + // decrease by one + llvm::StringRef loader_name(buffer, read_size - 1); + llvm::StringRef freebsd_kernel_loader_name("/red/herring"); + if (loader_name.equals(freebsd_kernel_loader_name)) + return eStrataKernel; + } + } + return eStrataUser; + } case llvm::ELF::ET_DYN: // 3 - Shared object file diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp index e3707365a9c3f19..601f5df43dbba4e 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp @@ -10,7 +10,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Target/DynamicLoader.h" -#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" +#include "Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.h" #include "ProcessFreeBSDKernel.h" #include "ThreadFreeBSDKernel.h" @@ -262,7 +262,7 @@ Status ProcessFreeBSDKernel::DoLoadCore() { DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { if (m_dyld_up.get() == nullptr) m_dyld_up.reset(DynamicLoader::FindPlugin( - this, DynamicLoaderStatic::GetPluginNameStatic())); + this, DynamicLoaderFreeBSDKernel::GetPluginNameStatic())); return m_dyld_up.get(); } From lldb-commits at lists.llvm.org Tue Oct 3 11:16:39 2023 From: lldb-commits at lists.llvm.org (Ed Maste via lldb-commits) Date: Tue, 03 Oct 2023 11:16:39 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651c5a87.170a0220.2f3bb.5140@mx.google.com> https://github.com/emaste closed https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Tue Oct 3 11:19:19 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 11:19:19 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651c5b27.170a0220.77c52.5233@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/68042 >From c7c9e907d897ae667331761d8097ccb7852c5d93 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 16:43:13 -0500 Subject: [PATCH 1/3] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR This patch adds translation from `omp.teams` operation to LLVM IR using OpenMPIRBuilder. The clauses are not handled in this patch. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +++ mlir/test/Target/LLVMIR/openmp-teams.mlir | 136 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/openmp-teams.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8f7f1963b3e5a4f..b9643be40e13c01 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -661,6 +661,24 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, return bodyGenStatus; } +// Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder +static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + return op.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ + LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + builder.restoreIP(codegenIP); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + }; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + return bodyGenStatus; +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -2406,6 +2424,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TeamsOp op) { + return convertOmpTeams(op, builder, moduleTranslation); + }) .Case([&](omp::TaskOp op) { return convertOmpTaskOp(op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir new file mode 100644 index 000000000000000..c9005fca94a7c20 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_simple +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) +// CHECK: ret void +llvm.func @omp_teams_simple() { + omp.teams { + llvm.call @foo() : () -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlinedfn:.+]]() +// CHECK: call void @foo() +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr %[[global_tid:.+]], ptr %[[bound_tid:.+]]) +// CHECK: call void @[[outlinedfn]] +// CHECK: ret void + +// ----- + +llvm.func @foo(i32) -> () + +// CHECK-LABEL: @omp_teams_shared_simple +// CHECK-SAME: (i32 [[arg0:%.+]]) +// CHECK: [[structArg:%.+]] = alloca { i32 } +// CHECK: br +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: store i32 [[arg0]], ptr [[gep]] +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:.+]], ptr [[structArg]]) +// CHECK: ret void +llvm.func @omp_teams_shared_simple(%arg0: i32) { + omp.teams { + llvm.call @foo(%arg0) : (i32) -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[structArg:%.+]]) +// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 +// CHECK: [[loadgep:%.+]] = load i32, ptr [[gep]] +// CHECK: call void @foo(i32 [[loadgep]]) +// CHECK: ret void +// CHECK: define void [[wrapperfn]](ptr [[global_tid:.+]], ptr [[bound_tid:.+]], ptr [[structArg:.+]]) +// CHECK: call void [[outlinedfn]](ptr [[structArg]]) +// CHECK: ret void + +// ----- + +llvm.func @my_alloca_fn() -> !llvm.ptr +llvm.func @foo(i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () +llvm.func @bar() + +// CHECK-LABEL: @omp_teams_branching_shared +// CHECK-SAME: (i1 [[condition:%.+]], i32 [[arg0:%.+]], float [[arg1:%.+]], ptr [[arg2:%.+]], fp128 [[arg3:%.+]]) + +// Checking that the allocation for struct argument happens in the alloca block. +// CHECK: [[structArg:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } +// CHECK: [[allocated:%.+]] = call ptr @my_alloca_fn() +// CHECK: [[loaded:%.+]] = load i32, ptr [[allocated]] +// CHECK: br label + +// Checking that the shared values are stored properly in the struct arg. +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]] +// CHECK: store i1 [[condition]], ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 1 +// CHECK: store i32 [[arg0]], ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 2 +// CHECK: store float [[arg1]], ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 3 +// CHECK: store ptr [[arg2]], ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 4 +// CHECK: store fp128 [[arg3]], ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 5 +// CHECK: store ptr [[allocated]], ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 6 +// CHECK: store i32 [[loaded]], ptr [[loadedPtr]] + +// Runtime call. +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:@.+]], ptr [[structArg]]) +// CHECK: br label +// CHECK: call void @bar() +// CHECK: ret void +llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %arg2: !llvm.ptr, %arg3: f128) { + %allocated = llvm.call @my_alloca_fn(): () -> !llvm.ptr + %loaded = llvm.load %allocated : !llvm.ptr + llvm.br ^codegenBlock +^codegenBlock: + omp.teams { + llvm.cond_br %condition, ^true_block, ^false_block + ^true_block: + llvm.call @foo(%arg0, %arg1, %arg2, %arg3, %allocated, %loaded) : (i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () + llvm.br ^exit + ^false_block: + llvm.br ^exit + ^exit: + omp.terminator + } + llvm.call @bar() : () -> () + llvm.return +} + +// Check the outlined function. +// CHECK: define internal void [[outlinedfn:@.+]](ptr [[data:%.+]]) +// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[data]] +// CHECK: [[condition:%.+]] = load i1, ptr [[conditionPtr]] +// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 1 +// CHECK: [[arg0:%.+]] = load i32, ptr [[arg0ptr]] +// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 2 +// CHECK: [[arg1:%.+]] = load float, ptr [[arg1ptr]] +// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 3 +// CHECK: [[arg2:%.+]] = load ptr, ptr [[arg2ptr]] +// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 4 +// CHECK: [[arg3:%.+]] = load fp128, ptr [[arg3ptr]] +// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 5 +// CHECK: [[allocated:%.+]] = load ptr, ptr [[allocatedPtr]] +// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 6 +// CHECK: [[loaded:%.+]] = load i32, ptr [[loadedPtr]] +// CHECK: br label + +// CHECK: br i1 [[condition]], label %[[true:.+]], label %[[false:.+]] +// CHECK: [[false]]: +// CHECK-NEXT: br label +// CHECK: [[true]]: +// CHECK: call void @foo(i32 [[arg0]], float [[arg1]], ptr [[arg2]], fp128 [[arg3]], ptr [[allocated]], i32 [[loaded]]) +// CHECK-NEXT: br label +// CHECK: ret void + +// Check the wrapper function +// CHECK: define void [[wrapperfn]](ptr [[globalTID:%.+]], ptr [[boundTID:%.+]], ptr [[data:%.+]]) +// CHECK: call void [[outlinedfn]](ptr [[data]]) +// CHECK: ret void >From 4618cca227680ed3487961560f9419050890db55 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 20:48:45 -0500 Subject: [PATCH 2/3] Formatting --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b9643be40e13c01..9eefd1be8ad070e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -662,20 +662,27 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, } // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder -static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { +static LogicalResult +convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; LogicalResult bodyGenStatus = success(); - if(op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || op.getThreadLimit() || !op.getAllocatorsVars().empty() || op.getReductions()) { + if (op.getNumTeamsLower() || op.getNumTeamsUpper() || op.getIfExpr() || + op.getThreadLimit() || !op.getAllocatorsVars().empty() || + op.getReductions()) { return op.emitError("unhandled clauses for translation to LLVM IR"); } - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP){ - LLVM::ModuleTranslation::SaveStack frame(moduleTranslation, allocaIP); + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); builder.restoreIP(codegenIP); - convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation, bodyGenStatus); + convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, + moduleTranslation, bodyGenStatus); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createTeams(ompLoc, bodyCB)); return bodyGenStatus; } >From d28e24d4c3bcb476d75c4f9fb906c5a4b294d340 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Tue, 3 Oct 2023 13:18:48 -0500 Subject: [PATCH 3/3] Addressed comments --- mlir/test/Target/LLVMIR/openmp-teams.mlir | 110 +++++++++++----------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir index c9005fca94a7c20..16457e88774b93a 100644 --- a/mlir/test/Target/LLVMIR/openmp-teams.mlir +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -3,7 +3,7 @@ llvm.func @foo() // CHECK-LABEL: @omp_teams_simple -// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[wrapperfn:.+]]) +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[WRAPPER_FN:.+]]) // CHECK: ret void llvm.func @omp_teams_simple() { omp.teams { @@ -13,11 +13,11 @@ llvm.func @omp_teams_simple() { llvm.return } -// CHECK: define internal void @[[outlinedfn:.+]]() +// CHECK: define internal void @[[OUTLINED_FN:.+]]() // CHECK: call void @foo() // CHECK: ret void -// CHECK: define void [[wrapperfn]](ptr %[[global_tid:.+]], ptr %[[bound_tid:.+]]) -// CHECK: call void @[[outlinedfn]] +// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK: call void @[[OUTLINED_FN]] // CHECK: ret void // ----- @@ -25,12 +25,12 @@ llvm.func @omp_teams_simple() { llvm.func @foo(i32) -> () // CHECK-LABEL: @omp_teams_shared_simple -// CHECK-SAME: (i32 [[arg0:%.+]]) -// CHECK: [[structArg:%.+]] = alloca { i32 } +// CHECK-SAME: (i32 [[ARG0:%.+]]) +// CHECK: [[STRUCT_ARG:%.+]] = alloca { i32 } // CHECK: br -// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 -// CHECK: store i32 [[arg0]], ptr [[gep]] -// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:.+]], ptr [[structArg]]) +// CHECK: [[GEP:%.+]] = getelementptr { i32 }, ptr [[STRUCT_ARG]], i32 0, i32 0 +// CHECK: store i32 [[ARG0]], ptr [[GEP]] +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[WRAPPER_FN:.+]], ptr [[STRUCT_ARG]]) // CHECK: ret void llvm.func @omp_teams_shared_simple(%arg0: i32) { omp.teams { @@ -40,13 +40,13 @@ llvm.func @omp_teams_shared_simple(%arg0: i32) { llvm.return } -// CHECK: define internal void [[outlinedfn:@.+]](ptr [[structArg:%.+]]) -// CHECK: [[gep:%.+]] = getelementptr { i32 }, ptr [[structArg]], i32 0, i32 0 -// CHECK: [[loadgep:%.+]] = load i32, ptr [[gep]] -// CHECK: call void @foo(i32 [[loadgep]]) +// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr [[STRUCT_ARG:%.+]]) +// CHECK: [[GEP:%.+]] = getelementptr { i32 }, ptr [[STRUCT_ARG]], i32 0, i32 0 +// CHECK: [[LOAD_GEP:%.+]] = load i32, ptr [[GEP]] +// CHECK: call void @foo(i32 [[LOAD_GEP]]) // CHECK: ret void -// CHECK: define void [[wrapperfn]](ptr [[global_tid:.+]], ptr [[bound_tid:.+]], ptr [[structArg:.+]]) -// CHECK: call void [[outlinedfn]](ptr [[structArg]]) +// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}, ptr [[STRUCT_ARG:.+]]) +// CHECK: call void [[OUTLINED_FN]](ptr [[STRUCT_ARG]]) // CHECK: ret void // ----- @@ -56,32 +56,32 @@ llvm.func @foo(i32, f32, !llvm.ptr, f128, !llvm.ptr, i32) -> () llvm.func @bar() // CHECK-LABEL: @omp_teams_branching_shared -// CHECK-SAME: (i1 [[condition:%.+]], i32 [[arg0:%.+]], float [[arg1:%.+]], ptr [[arg2:%.+]], fp128 [[arg3:%.+]]) +// CHECK-SAME: (i1 [[CONDITION:%.+]], i32 [[ARG0:%.+]], float [[ARG1:%.+]], ptr [[ARG2:%.+]], fp128 [[ARG3:%.+]]) // Checking that the allocation for struct argument happens in the alloca block. -// CHECK: [[structArg:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } -// CHECK: [[allocated:%.+]] = call ptr @my_alloca_fn() -// CHECK: [[loaded:%.+]] = load i32, ptr [[allocated]] +// CHECK: [[STRUCT_ARG:%.+]] = alloca { i1, i32, float, ptr, fp128, ptr, i32 } +// CHECK: [[ALLOCATED:%.+]] = call ptr @my_alloca_fn() +// CHECK: [[LOADED:%.+]] = load i32, ptr [[ALLOCATED]] // CHECK: br label // Checking that the shared values are stored properly in the struct arg. -// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]] -// CHECK: store i1 [[condition]], ptr [[conditionPtr]] -// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 1 -// CHECK: store i32 [[arg0]], ptr [[arg0ptr]] -// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 2 -// CHECK: store float [[arg1]], ptr [[arg1ptr]] -// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 3 -// CHECK: store ptr [[arg2]], ptr [[arg2ptr]] -// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 4 -// CHECK: store fp128 [[arg3]], ptr [[arg3ptr]] -// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 5 -// CHECK: store ptr [[allocated]], ptr [[allocatedPtr]] -// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[structArg]], i32 0, i32 6 -// CHECK: store i32 [[loaded]], ptr [[loadedPtr]] +// CHECK: [[CONDITION_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]] +// CHECK: store i1 [[CONDITION]], ptr [[CONDITION_PTR]] +// CHECK: [[ARG0_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 1 +// CHECK: store i32 [[ARG0]], ptr [[ARG0_PTR]] +// CHECK: [[ARG1_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 2 +// CHECK: store float [[ARG1]], ptr [[ARG1_PTR]] +// CHECK: [[ARG2_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 3 +// CHECK: store ptr [[ARG2]], ptr [[ARG2_PTR]] +// CHECK: [[ARG3_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 4 +// CHECK: store fp128 [[ARG3]], ptr [[ARG3_PTR]] +// CHECK: [[ALLOCATED_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 5 +// CHECK: store ptr [[ALLOCATED]], ptr [[ALLOCATED_PTR]] +// CHECK: [[LOADED_PTR:%.+]] = getelementptr {{.+}}, ptr [[STRUCT_ARG]], i32 0, i32 6 +// CHECK: store i32 [[LOADED]], ptr [[LOADED_PTR]] // Runtime call. -// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[wrapperfn:@.+]], ptr [[structArg]]) +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[WRAPPER_FN:@.+]], ptr [[STRUCT_ARG]]) // CHECK: br label // CHECK: call void @bar() // CHECK: ret void @@ -105,32 +105,32 @@ llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %a } // Check the outlined function. -// CHECK: define internal void [[outlinedfn:@.+]](ptr [[data:%.+]]) -// CHECK: [[conditionPtr:%.+]] = getelementptr {{.+}}, ptr [[data]] -// CHECK: [[condition:%.+]] = load i1, ptr [[conditionPtr]] -// CHECK: [[arg0ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 1 -// CHECK: [[arg0:%.+]] = load i32, ptr [[arg0ptr]] -// CHECK: [[arg1ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 2 -// CHECK: [[arg1:%.+]] = load float, ptr [[arg1ptr]] -// CHECK: [[arg2ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 3 -// CHECK: [[arg2:%.+]] = load ptr, ptr [[arg2ptr]] -// CHECK: [[arg3ptr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 4 -// CHECK: [[arg3:%.+]] = load fp128, ptr [[arg3ptr]] -// CHECK: [[allocatedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 5 -// CHECK: [[allocated:%.+]] = load ptr, ptr [[allocatedPtr]] -// CHECK: [[loadedPtr:%.+]] = getelementptr {{.+}}, ptr [[data]], i32 0, i32 6 -// CHECK: [[loaded:%.+]] = load i32, ptr [[loadedPtr]] +// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr [[DATA:%.+]]) +// CHECK: [[CONDITION_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]] +// CHECK: [[CONDITION:%.+]] = load i1, ptr [[CONDITION_PTR]] +// CHECK: [[ARG0_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 1 +// CHECK: [[ARG0:%.+]] = load i32, ptr [[ARG0_PTR]] +// CHECK: [[ARG1_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 2 +// CHECK: [[ARG1:%.+]] = load float, ptr [[ARG1_PTR]] +// CHECK: [[ARG2_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 3 +// CHECK: [[ARG2:%.+]] = load ptr, ptr [[ARG2_PTR]] +// CHECK: [[ARG3_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 4 +// CHECK: [[ARG3:%.+]] = load fp128, ptr [[ARG3_PTR]] +// CHECK: [[ALLOCATED_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 5 +// CHECK: [[ALLOCATED:%.+]] = load ptr, ptr [[ALLOCATED_PTR]] +// CHECK: [[LOADED_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 6 +// CHECK: [[LOADED:%.+]] = load i32, ptr [[LOADED_PTR]] // CHECK: br label -// CHECK: br i1 [[condition]], label %[[true:.+]], label %[[false:.+]] -// CHECK: [[false]]: +// CHECK: br i1 [[CONDITION]], label %[[TRUE:.+]], label %[[FALSE:.+]] +// CHECK: [[FALSE]]: // CHECK-NEXT: br label -// CHECK: [[true]]: -// CHECK: call void @foo(i32 [[arg0]], float [[arg1]], ptr [[arg2]], fp128 [[arg3]], ptr [[allocated]], i32 [[loaded]]) +// CHECK: [[TRUE]]: +// CHECK: call void @foo(i32 [[ARG0]], float [[ARG1]], ptr [[ARG2]], fp128 [[ARG3]], ptr [[ALLOCATED]], i32 [[LOADED]]) // CHECK-NEXT: br label // CHECK: ret void // Check the wrapper function -// CHECK: define void [[wrapperfn]](ptr [[globalTID:%.+]], ptr [[boundTID:%.+]], ptr [[data:%.+]]) -// CHECK: call void [[outlinedfn]](ptr [[data]]) +// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}, ptr [[DATA:%.+]]) +// CHECK: call void [[OUTLINED_FN]](ptr [[DATA]]) // CHECK: ret void From lldb-commits at lists.llvm.org Tue Oct 3 11:31:50 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 11:31:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c5e16.a70a0220.aadbe.4a5f@mx.google.com> https://github.com/junior-jl updated https://github.com/llvm/llvm-project/pull/67019 >From 6de148adcdd1eedea7e23b4e267c6f42bb68bc45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Tue, 3 Oct 2023 15:28:45 -0300 Subject: [PATCH] [lldb] add stop-at-user-entry option to process launch --- lldb/include/lldb/Target/Language.h | 4 ++ lldb/include/lldb/Target/Target.h | 2 + .../Commands/CommandOptionsProcessLaunch.cpp | 13 +++++- lldb/source/Commands/Options.td | 4 ++ .../Language/CPlusPlus/CPlusPlusLanguage.h | 2 + .../Plugins/Language/ObjC/ObjCLanguage.h | 2 + .../ObjCPlusPlus/ObjCPlusPlusLanguage.h | 2 + lldb/source/Target/Target.cpp | 42 +++++++++++++++++++ .../command-process-launch-user-entry.test | 8 ++++ 9 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 lldb/test/Shell/Commands/command-process-launch-user-entry.test diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index a6b9ccaf31b3c42..d53089ba4a59974 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -160,6 +160,10 @@ class Language : public PluginInterface { virtual lldb::LanguageType GetLanguageType() const = 0; + // Implement this function to return the user-defined entry point name + // for the language + virtual llvm::StringRef GetUserEntryPointName() const { return {}; } + virtual bool IsTopLevelFunction(Function &function); virtual bool IsSourceFile(llvm::StringRef file_path) const = 0; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index e9e531d0e12640a..82a343ee03fb516 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,6 +654,8 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); + lldb::BreakpointSP CreateBreakpointAtUserEntry(); + // Use this to create a file and line breakpoint to a given module or all // module it is nullptr lldb::BreakpointSP CreateBreakpoint(const FileSpecList *containingModules, diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 85ad8ff5e07132c..3055e4ca45bd230 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -8,6 +8,7 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" @@ -15,11 +16,13 @@ #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; using namespace lldb; @@ -38,7 +41,13 @@ Status CommandOptionsProcessLaunch::SetOptionValue( case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; - + case 'm': // Stop at user entry point + { + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); + target_sp->CreateBreakpointAtUserEntry(); + break; + } case 'i': // STDIN for read only { FileAction action; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..dd4cf5c4dc043e7 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -675,6 +675,10 @@ let Command = "platform shell" in { let Command = "process launch" in { def process_launch_stop_at_entry : Option<"stop-at-entry", "s">, Desc<"Stop at the entry point of the program when launching a process.">; + def process_launch_stop_at_user_entry : Option<"stop-at-user-entry", "m">, + Desc<"Stop at the user entry point when launching a process. For C based " + "languages this will be the 'main' function, but this might differ for " + "other languages.">; def process_launch_disable_aslr : Option<"disable-aslr", "A">, Arg<"Boolean">, Desc<"Set whether to disable address space layout randomization when launching a process.">; def process_launch_plugin : Option<"plugin", "P">, Arg<"Plugin">, diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 7712a60b7795951..623d481bf117f48 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -103,6 +103,8 @@ class CPlusPlusLanguage : public Language { return lldb::eLanguageTypeC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + std::unique_ptr GetTypeScavenger() override; lldb::TypeCategoryImplSP GetFormatters() override; diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index bb8057846bb7c30..a50f4b036108d7a 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -127,6 +127,8 @@ class ObjCLanguage : public Language { return lldb::eLanguageTypeObjC; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + // Get all possible names for a method. Examples: // If method_name is "+[NSString(my_additions) myStringWithCString:]" // variant_names[0] => "+[NSString myStringWithCString:]" diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h index b7c71b5dbb1c991..1beab9348eb72e8 100644 --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -27,6 +27,8 @@ class ObjCPlusPlusLanguage : public Language { return lldb::eLanguageTypeObjC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + llvm::StringRef GetNilReferenceSummaryString() override { return "nil"; } bool IsSourceFile(llvm::StringRef file_path) const override; diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f197b1b1aa097c3..013d72bda6308be 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); + return lldb::BreakpointSP(); + } + std::string entryPointName = lang->GetUserEntryPointName().str(); + if (!entryPointName.empty()) + entryPointNamesSet.insert(entryPointName); + } + if (entryPointNamesSet.empty()) { + error.SetErrorString("No entry point name found\n"); + return lldb::BreakpointSP(); + } + BreakpointSP bp_sp = + target_sp->CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware + ); + if (!bp_sp) { + error.SetErrorString("Breakpoint creation failed.\n"); + return lldb::BreakpointSP(); + } + bp_sp->SetOneShot(true); + return bp_sp; +} + BreakpointSP Target::CreateSourceRegexBreakpoint( const FileSpecList *containingModules, const FileSpecList *source_file_spec_list, diff --git a/lldb/test/Shell/Commands/command-process-launch-user-entry.test b/lldb/test/Shell/Commands/command-process-launch-user-entry.test new file mode 100644 index 000000000000000..32ef710fe567439 --- /dev/null +++ b/lldb/test/Shell/Commands/command-process-launch-user-entry.test @@ -0,0 +1,8 @@ +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -s %s -o exit | FileCheck %s + +process launch -m +# CHECK-LABEL: process launch -m +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = one-shot breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c \ No newline at end of file From lldb-commits at lists.llvm.org Tue Oct 3 11:34:01 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 11:34:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c5e99.a70a0220.4d723.4a1f@mx.google.com> ================ @@ -95,21 +95,24 @@ class Language : public PluginInterface { class EitherTypeScavenger : public TypeScavenger { public: EitherTypeScavenger() : TypeScavenger() { - for (std::shared_ptr scavenger : { std::shared_ptr(new ScavengerTypes())... }) { + for (std::shared_ptr scavenger : + {std::shared_ptr(new ScavengerTypes())...}) { ---------------- junior-jl wrote: Yes, that was indeed the case. I believe I removed all the unrelated modifications from the PR in this last push. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 12:05:45 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Tue, 03 Oct 2023 12:05:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c6609.170a0220.c7e66.586b@mx.google.com> https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/68130 >From 47b34bb327e1078678d3ba0c96ebce3fc89cf2ae Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 16:43:50 +0000 Subject: [PATCH 1/2] [mlir][sparse] Print new syntax Printing changes from #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> to map = (d0) -> (d0 : compressed). Level properties, ELL and slice are also supported. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 20 +-- .../SparseTensor/IR/SparseTensorDialect.cpp | 64 ++++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 +- .../SparseTensor/roundtrip_encoding.mlir | 32 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 +- .../SparseTensor/sparse_tensor_reshape.mlir | 2 +- .../python/dialects/sparse_tensor/dialect.py | 160 +++++++++--------- 7 files changed, 159 insertions(+), 135 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index bc351ec52c0946b..2920ef79f461c6a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -215,29 +215,29 @@ constexpr const char *toMLIRString(DimLevelType dlt) { case DimLevelType::Compressed: return "compressed"; case DimLevelType::CompressedNu: - return "compressed_nu"; + return "compressed(nonunique)"; case DimLevelType::CompressedNo: - return "compressed_no"; + return "compressed(nonordered)"; case DimLevelType::CompressedNuNo: - return "compressed_nu_no"; + return "compressed(nonunique, nonordered)"; case DimLevelType::Singleton: return "singleton"; case DimLevelType::SingletonNu: - return "singleton_nu"; + return "singleton(nonunique)"; case DimLevelType::SingletonNo: - return "singleton_no"; + return "singleton(nonordered)"; case DimLevelType::SingletonNuNo: - return "singleton_nu_no"; + return "singleton(nonunique, nonordered)"; case DimLevelType::LooseCompressed: return "loose_compressed"; case DimLevelType::LooseCompressedNu: - return "loose_compressed_nu"; + return "loose_compressed(nonunique)"; case DimLevelType::LooseCompressedNo: - return "loose_compressed_no"; + return "loose_compressed(nonordered)"; case DimLevelType::LooseCompressedNuNo: - return "loose_compressed_nu_no"; + return "loose_compressed(nonunique, nonordered)"; case DimLevelType::TwoOutOfFour: - return "compressed24"; + return "block2_4"; } return ""; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 3897e1b9ea3597c..4c8dccdda6c0c7c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; // Print remaining members only for non-default values. - if (!isIdentity()) - printer << ", dimToLvl = affine_map<" << getDimToLvl() << ">"; if (getPosWidth()) printer << ", posWidth = " << getPosWidth(); if (getCrdWidth()) printer << ", crdWidth = " << getCrdWidth(); - if (!getDimSlices().empty()) { - printer << ", dimSlices = [ "; - llvm::interleaveComma(getDimSlices(), printer, - [&](SparseTensorDimSliceAttr attr) { - // Calls SparseTensorDimSliceAttr::print directly to - // skip mnemonic. - attr.print(printer); - }); - printer << " ]"; - } - printer << " }>"; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 69a9c274a861ce1..c3b16807a7c18a6 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index 39e3ef102423524..c4ef50bee01ea2c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // CHECK-LABEL: func private @sparse_1d_tensor( -// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>>) +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) // ----- @@ -13,7 +13,7 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map }> // CHECK-LABEL: func private @sparse_csr( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>) func.func private @sparse_csr(tensor) // ----- @@ -23,7 +23,7 @@ func.func private @sparse_csr(tensor) }> // CHECK-LABEL: func private @CSR_explicit( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : compressed) }>> func.func private @CSR_explicit(%arg0: tensor) { return } @@ -37,7 +37,7 @@ func.func private @CSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @sparse_csc( -// CHECK-SAME: tensor (d1, d0)> }>>) +// CHECK-SAME: tensor (d1 : dense, d0 : compressed) }>>) func.func private @sparse_csc(tensor) // ----- @@ -49,7 +49,7 @@ func.func private @sparse_csc(tensor) }> // CHECK-LABEL: func private @sparse_dcsc( -// CHECK-SAME: tensor (d1, d0)>, crdWidth = 64 }>>) +// CHECK-SAME: tensor (d1 : compressed, d0 : compressed), crdWidth = 64 }>>) func.func private @sparse_dcsc(tensor) // ----- @@ -59,7 +59,7 @@ func.func private @sparse_dcsc(tensor) }> // CHECK-LABEL: func private @sparse_coo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>) func.func private @sparse_coo(tensor) // ----- @@ -69,7 +69,7 @@ func.func private @sparse_coo(tensor) }> // CHECK-LABEL: func private @sparse_bcoo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>) func.func private @sparse_bcoo(tensor) // ----- @@ -79,7 +79,7 @@ func.func private @sparse_bcoo(tensor) }> // CHECK-LABEL: func private @sparse_sorted_coo( -// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed_nu", "singleton" ] }>>) +// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) // ----- @@ -94,7 +94,7 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) }> // CHECK-LABEL: func private @sparse_bcsr( -// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed", "dense", "dense" ], dimToLvl = affine_map<(d0, d1) -> (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) @@ -105,7 +105,7 @@ func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) }> // CHECK-LABEL: func private @sparse_ell( -// CHECK-SAME: tensor (d0 * (s0 * 4), d0, d1)> }>> +// CHECK-SAME: tensor (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>> func.func private @sparse_ell(tensor) // ----- @@ -115,7 +115,7 @@ func.func private @sparse_ell(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -125,7 +125,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -138,7 +138,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_2_out_of_4( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : block2_4) }>> func.func private @sparse_2_out_of_4(tensor) // ----- @@ -153,7 +153,7 @@ func.func private @sparse_2_out_of_4(tensor) }> // CHECK-LABEL: func private @BCSR( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR(%arg0: tensor) { return } @@ -174,7 +174,7 @@ func.func private @BCSR(%arg0: tensor) { }> // CHECK-LABEL: func private @BCSR_explicit( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR_explicit(%arg0: tensor) { return } @@ -190,7 +190,7 @@ func.func private @BCSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @NV_24( -// CHECK-SAME: tensor (d0, d1 floordiv 4, d1 mod 4)> }>> +// CHECK-SAME: tensor (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4) }>> func.func private @NV_24(%arg0: tensor) { return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 7f8edac15302616..3a2376f75654af9 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -16,7 +16,7 @@ // CHECK-ROUND: return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -53,7 +53,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-ROUND: return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-ROUND: return %[[E]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -142,7 +142,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-ROUND: return %[[C]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index 9368cc71c5faa42..e0111c89df65a2d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -4,7 +4,7 @@ #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK: func.func @sparse_reshape( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C25:.*]] = arith.constant 25 : index // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index e1048edce184a51..6d15363fb17118d 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,95 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get( - casted.lvl_types, casted.dim_to_lvl, 8, 32 - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" - ) + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + ) + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From 2be69066192995ff171e08a54f7c7fdd3e35ab44 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 18:39:17 +0000 Subject: [PATCH 2/2] format --- .../python/dialects/sparse_tensor/dialect.py | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 6d15363fb17118d..d80b878323377a4 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,93 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" + ) ) - ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding From lldb-commits at lists.llvm.org Tue Oct 3 12:09:08 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Tue, 03 Oct 2023 12:09:08 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c66d4.170a0220.346f4.4ee9@mx.google.com> https://github.com/yinying-lisa-li edited https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 12:43:30 2023 From: lldb-commits at lists.llvm.org (walter erquinigo via lldb-commits) Date: Tue, 03 Oct 2023 12:43:30 -0700 (PDT) Subject: [Lldb-commits] [lldb] 3674a06 - [NFC][LLDB] Remove an unnecessary virtual destructor Message-ID: <651c6ee2.a70a0220.ec9e1.523a@mx.google.com> Author: walter erquinigo Date: 2023-10-03T15:43:22-04:00 New Revision: 3674a0610a71becc54eab715905c4017d65471c4 URL: https://github.com/llvm/llvm-project/commit/3674a0610a71becc54eab715905c4017d65471c4 DIFF: https://github.com/llvm/llvm-project/commit/3674a0610a71becc54eab715905c4017d65471c4.diff LOG: [NFC][LLDB] Remove an unnecessary virtual destructor DWARFExpression has a virtual destructor but no other virtual methods, so we can safely remove the virtual keyword. Added: Modified: lldb/include/lldb/Expression/DWARFExpression.h Removed: ################################################################################ diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 380910ba0ea3d61..5e03f539a272cac 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -45,7 +45,7 @@ class DWARFExpression { DWARFExpression(const DataExtractor &data); /// Destructor - virtual ~DWARFExpression(); + ~DWARFExpression(); /// Return true if the location expression contains data bool IsValid() const; From lldb-commits at lists.llvm.org Tue Oct 3 12:58:18 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Tue, 03 Oct 2023 12:58:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) Message-ID: https://github.com/walter-erquinigo created https://github.com/llvm/llvm-project/pull/68150 As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only two classes that I didn't change are DWARFDataExtractor, because that's being explicitly exported as part of lldb_private in `lldb-forward.h` , and the ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser, but I'm okay with changing that. In any case, even if I didn't need this for my work, adding this namespace could be considered a good practice. >From c8886aa7bf978ef35a7bedd1dc34dda7d8f840b3 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 16:56:16 -0400 Subject: [PATCH] [LLDB][NFC] Create a namespace for the DWARF plugin As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only two classes that I didn't change are DWARFDataExtractor, because that's being explicitly exported as part of lldb_private in `lldb-forward.h` , and the ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser, but I'm okay with changing that. In any case, even if I didn't need this for my work, adding this namespace could be considered a good practice. --- .../include/lldb/Expression/DWARFExpression.h | 24 ++- .../lldb/Expression/DWARFExpressionList.h | 9 +- lldb/include/lldb/Symbol/TypeSystem.h | 8 +- lldb/source/Expression/DWARFExpression.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.h | 35 ++-- .../Plugins/SymbolFile/DWARF/DIERef.cpp | 1 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 7 +- .../SymbolFile/DWARF/DWARFASTParser.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 5 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 + .../SymbolFile/DWARF/DWARFASTParserClang.h | 158 ++++++++++-------- .../SymbolFile/DWARF/DWARFAttribute.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 3 + .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp | 5 +- .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.h | 2 + .../SymbolFile/DWARF/DWARFCompileUnit.cpp | 1 + .../SymbolFile/DWARF/DWARFCompileUnit.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.h | 48 +++--- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 2 + .../SymbolFile/DWARF/DWARFDataExtractor.h | 2 +- .../SymbolFile/DWARF/DWARFDebugAbbrev.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAbbrev.h | 2 + .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugArangeSet.h | 2 + .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAranges.h | 2 + .../SymbolFile/DWARF/DWARFDebugInfo.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 12 +- .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugInfoEntry.h | 2 + .../SymbolFile/DWARF/DWARFDebugMacro.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugMacro.h | 6 +- .../SymbolFile/DWARF/DWARFDebugRanges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugRanges.h | 6 +- .../SymbolFile/DWARF/DWARFDeclContext.cpp | 1 + .../SymbolFile/DWARF/DWARFDeclContext.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 4 +- .../SymbolFile/DWARF/DWARFFormValue.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 43 ++--- .../SymbolFile/DWARF/DWARFTypeUnit.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 17 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 39 ++--- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/ManualDWARFIndex.h | 37 ++-- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/NameToDIE.h | 5 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 10 +- .../SymbolFile/DWARF/SymbolFileDWARF.h | 12 +- .../DWARF/SymbolFileDWARFDebugMap.cpp | 3 + .../DWARF/SymbolFileDWARFDebugMap.h | 13 +- .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp | 1 + .../SymbolFile/DWARF/SymbolFileDWARFDwo.h | 9 +- .../SymbolFile/DWARF/UniqueDWARFASTType.cpp | 1 + .../SymbolFile/DWARF/UniqueDWARFASTType.h | 5 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 1 + .../TypeSystem/Clang/TypeSystemClang.h | 2 +- 65 files changed, 357 insertions(+), 232 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 380910ba0ea3d61..0d57a153a8b8e61 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -64,18 +66,21 @@ class DWARFExpression { /// \return /// The address specified by the operation, if the operation exists, or /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, - bool &error) const; + lldb::addr_t + GetLocation_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + bool &error) const; - bool Update_DW_OP_addr(const DWARFUnit *dwarf_cu, lldb::addr_t file_addr); + bool Update_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - bool ContainsThreadLocalStorage(const DWARFUnit *dwarf_cu) const; + bool ContainsThreadLocalStorage( + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) const; bool LinkThreadLocalStorage( - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, std::function const &link_address_callback); @@ -128,15 +133,16 @@ class DWARFExpression { /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, lldb::ModuleSP module_sp, const DataExtractor &opcodes, - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); - static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, - const DataExtractor &data, - DWARFExpressionList *loc_list); + static bool + ParseDWARFLocationList(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + const DataExtractor &data, + DWARFExpressionList *loc_list); bool GetExpressionData(DataExtractor &data) const { data = m_data; diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index c0939647056dcbf..c44ff71a7eef1bf 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -13,7 +13,9 @@ #include "lldb/Utility/RangeMap.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -24,13 +26,14 @@ class DWARFExpressionList { public: DWARFExpressionList() = default; - DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + DWARFExpressionList(lldb::ModuleSP module_sp, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t func_file_addr) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), m_func_file_addr(func_file_addr) {} DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, - const DWARFUnit *dwarf_cu) + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { AddExpression(0, LLDB_INVALID_ADDRESS, expr); } @@ -136,7 +139,7 @@ class DWARFExpressionList { /// The DWARF compile unit this expression belongs to. It is used to evaluate /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; + const lldb_plugin::dwarf::DWARFUnit *m_dwarf_cu = nullptr; // Function base file address. lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index eb6e453e1aec0d0..4ef22a02d8c2d3f 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -28,8 +28,11 @@ #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFDIE; class DWARFASTParser; +} // namespace lldb_plugin::dwarf + class PDBASTParser; namespace lldb_private { @@ -93,7 +96,10 @@ class TypeSystem : public PluginInterface, /// removing all the TypeSystems from the TypeSystemMap. virtual void Finalize() {} - virtual DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() { + return nullptr; + } + virtual PDBASTParser *GetPDBParser() { return nullptr; } virtual npdb::PdbAstBuilder *GetNativePDBParser() { return nullptr; } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 93fcf0579be0b18..18528d78f1cf01b 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,6 +45,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_data() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 34fb98b5a9b690a..286fadfb2dcdaf6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::unique_ptr AppleDWARFIndex::Create( Module &module, DWARFDataExtractor apple_names, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h index 6b948e07989531e..ab7d4659e56cbe4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -12,15 +12,18 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class AppleDWARFIndex : public DWARFIndex { public: static std::unique_ptr - Create(Module &module, DWARFDataExtractor apple_names, - DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, - DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor apple_names, + lldb_private::DWARFDataExtractor apple_namespaces, + lldb_private::DWARFDataExtractor apple_types, + lldb_private::DWARFDataExtractor apple_objc, + lldb_private::DWARFDataExtractor debug_str); - AppleDWARFIndex(Module &module, + AppleDWARFIndex(lldb_private::Module &module, std::unique_ptr apple_names, std::unique_ptr apple_namespaces, std::unique_ptr apple_types, @@ -33,33 +36,33 @@ class AppleDWARFIndex : public DWARFIndex { void Preload() override {} void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: std::unique_ptr m_apple_names_up; @@ -77,6 +80,6 @@ class AppleDWARFIndex : public DWARFIndex { std::optional search_for_tag = std::nullopt, std::optional search_for_qualhash = std::nullopt); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_APPLEDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 88a5e6027557b9b..d4c6b043fec359c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -14,6 +14,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index b5a5cfe263f7804..939336dab2be6e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -14,6 +14,7 @@ #include #include +namespace lldb_plugin::dwarf { /// Identifies a DWARF debug info entry within a given Module. It contains three /// "coordinates": /// - file_index: identifies the separate stand alone debug info file @@ -131,10 +132,12 @@ class DIERef { static_assert(sizeof(DIERef) == 8); typedef std::vector DIEArray; +} // namespace lldb_plugin::dwarf namespace llvm { -template<> struct format_provider { - static void format(const DIERef &ref, raw_ostream &OS, StringRef Style); +template <> struct format_provider { + static void format(const lldb_plugin::dwarf::DIERef &ref, raw_ostream &OS, + StringRef Style); }; } // namespace llvm diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index a68b7cd110eb719..06be740ad398d7d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -18,6 +18,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::optional DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 18825ae060b12fe..cf9f4bf8cdf30e4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -17,11 +17,13 @@ #include "lldb/lldb-enumerations.h" #include -class DWARFDIE; namespace lldb_private { class CompileUnit; class ExecutionContext; } + +namespace lldb_plugin::dwarf { +class DWARFDIE; class SymbolFileDWARF; class DWARFASTParser { @@ -65,5 +67,6 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..817ae92e962b3d6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -60,6 +60,8 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; + DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) : m_ast(ast), m_die_to_decl_ctx(), m_decl_ctx_to_die() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 88bfc490e890744..8a8c8f48af16f97 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -31,12 +31,14 @@ namespace lldb_private { class CompileUnit; } +namespace lldb_plugin::dwarf { class DWARFDebugInfoEntry; class SymbolFileDWARF; +} // namespace lldb_plugin::dwarf struct ParsedDWARFTypeAttributes; -class DWARFASTParserClang : public DWARFASTParser { +class DWARFASTParserClang : public lldb_plugin::dwarf::DWARFASTParser { public: DWARFASTParserClang(lldb_private::TypeSystemClang &ast); @@ -44,32 +46,33 @@ class DWARFASTParserClang : public DWARFASTParser { // DWARFASTParser interface. lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, bool *type_is_new_ptr) override; - lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; + lldb_private::ConstString ConstructDemangledNameFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + CompleteTypeFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + GetDeclForUIDFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext - GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -85,9 +88,9 @@ class DWARFASTParserClang : public DWARFASTParser { /// DWARFFormValue with the bit width of the given integer type. /// Returns an error if the value in the DWARFFormValue does not fit /// into the given integer type or the integer type isn't supported. - llvm::Expected - ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, - const DWARFFormValue &form_value) const; + llvm::Expected ExtractIntFromFormValue( + const lldb_private::CompilerType &int_type, + const lldb_plugin::dwarf::DWARFFormValue &form_value) const; /// Returns the template parameters of a class DWARFDIE as a string. /// @@ -100,7 +103,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) override; + GetDIEClassTemplateParams(const lldb_plugin::dwarf::DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -108,14 +111,17 @@ class DWARFASTParserClang : public DWARFASTParser { class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; - typedef llvm::DenseMap DIEToModuleMap; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclMap; lldb_private::TypeSystemClang &m_ast; @@ -126,11 +132,13 @@ class DWARFASTParserClang : public DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); + clang::DeclContext * + GetDeclContextForBlock(const lldb_plugin::dwarf::DWARFDIE &die); - clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); + clang::BlockDecl *ResolveBlockDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveNamespaceDIE(const lldb_plugin::dwarf::DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -141,31 +149,34 @@ class DWARFASTParserClang : public DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveImportedDeclarationDIE(const lldb_plugin::dwarf::DWARFDIE &die); - bool ParseTemplateDIE(const DWARFDIE &die, + bool ParseTemplateDIE(const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); + std::string + GetCPlusPlusQualifiedName(const lldb_plugin::dwarf::DWARFDIE &die); bool ParseChildMembers( - const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, + const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, + std::vector &member_function_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const DWARFDIE &parent_die, bool skip_artificial, - bool &is_static, bool &is_variadic, + const lldb_plugin::dwarf::DWARFDIE &parent_die, + bool skip_artificial, bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, @@ -173,33 +184,40 @@ class DWARFASTParserClang : public DWARFASTParser { size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, bool is_signed, uint32_t enumerator_byte_size, - const DWARFDIE &parent_die); + const lldb_plugin::dwarf::DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); + lldb_private::Type *GetTypeForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); + clang::Decl *GetClangDeclForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, - DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextContainingDIE(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_plugin::dwarf::DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID + GetOwningClangModule(const lldb_plugin::dwarf::DWARFDIE &die); - bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, - const DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes( + const lldb_plugin::dwarf::DWARFDIE &src_class_die, + const lldb_plugin::dwarf::DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetCachedClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, + const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, + const lldb_plugin::dwarf::DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. @@ -207,16 +225,17 @@ class DWARFASTParserClang : public DWARFASTParser { /// Returns \p type_sp. lldb::TypeSP UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); + const lldb_plugin::dwarf::DWARFDIE &die, + lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + lldb::ModuleSP GetModuleForType(const lldb_plugin::dwarf::DWARFDIE &die); private: struct FieldInfo { @@ -268,32 +287,37 @@ class DWARFASTParserClang : public DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, + void ParseObjCProperty(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, DelayedPropertyList &delayed_properties); void - ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + ParseSingleMember(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info); - bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteRecordType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteEnumType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const DWARFDIE &die, + lldb::TypeSP ParseArrayType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + lldb::TypeSP ParsePointerToMemberType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. @@ -311,7 +335,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -328,7 +353,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, + ParseRustVariantPart(lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -338,7 +364,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + explicit ParsedDWARFTypeAttributes(const lldb_plugin::dwarf::DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -355,12 +381,12 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; + lldb_plugin::dwarf::DWARFDIE object_pointer; + lldb_plugin::dwarf::DWARFFormValue abstract_origin; + lldb_plugin::dwarf::DWARFFormValue containing_type; + lldb_plugin::dwarf::DWARFFormValue signature; + lldb_plugin::dwarf::DWARFFormValue specification; + lldb_plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; size_t calling_convention = llvm::dwarf::DW_CC_normal; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp index 00b56537ae2b5fe..48bd62012afb393 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp @@ -11,6 +11,7 @@ #include "DWARFDebugInfo.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFAttributes::DWARFAttributes() : m_infos() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index 90e12fa024936d6..d0f152608dde415 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFAttribute { @@ -31,6 +32,7 @@ class DWARFAttribute { form = m_form; val = m_value; } + protected: dw_attr_t m_attr; dw_form_t m_form; @@ -72,5 +74,6 @@ class DWARFAttributes { typedef llvm::SmallVector collection; collection m_infos; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp index 37a917c3a7661eb..9ab7f0651d93d1f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp @@ -18,6 +18,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; std::optional DWARFBaseDIE::GetDIERef() const { if (!IsValid()) @@ -35,7 +36,7 @@ dw_tag_t DWARFBaseDIE::Tag() const { } const char *DWARFBaseDIE::GetTagAsCString() const { - return lldb_private::DW_TAG_value_to_name(Tag()); + return DW_TAG_value_to_name(Tag()); } const char *DWARFBaseDIE::GetAttributeValueAsString(const dw_attr_t attr, @@ -120,6 +121,7 @@ DWARFAttributes DWARFBaseDIE::GetAttributes(Recurse recurse) const { return DWARFAttributes(); } +namespace lldb_plugin::dwarf { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return lhs.GetDIE() == rhs.GetDIE() && lhs.GetCU() == rhs.GetCU(); } @@ -127,6 +129,7 @@ bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return !(lhs == rhs); } +} // namespace lldb_plugin::dwarf const DWARFDataExtractor &DWARFBaseDIE::GetData() const { // Clients must check if this DIE is valid before calling this function. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h index 8bcf807ad163a60..f7948ae13716e91 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include +namespace lldb_plugin::dwarf { class DIERef; class DWARFASTParser; class DWARFAttributes; @@ -124,5 +125,6 @@ class DWARFBaseDIE { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFBASEDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp index f839a59bf6c390b..6f18c3ddf605f1d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFCompileUnit::Dump(Stream *s) const { s->Format( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index ab3017ba0ffcbca..644a2914362776b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -12,6 +12,7 @@ #include "DWARFUnit.h" #include "llvm/Support/Error.h" +namespace lldb_plugin::dwarf { class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; @@ -36,5 +37,6 @@ class DWARFCompileUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFCOMPILEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index f72dad88e157592..d7979a43e46e8a2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; static DWARFDataExtractor LoadSection(SectionList *section_list, SectionType section_type) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index 7df776b5f514155..cabbb0bffbf6616 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -16,16 +16,16 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext { private: - SectionList *m_main_section_list; - SectionList *m_dwo_section_list; + lldb_private::SectionList *m_main_section_list; + lldb_private::SectionList *m_dwo_section_list; mutable std::unique_ptr m_llvm_context; struct SectionData { llvm::once_flag flag; - DWARFDataExtractor data; + lldb_private::DWARFDataExtractor data; }; SectionData m_data_debug_abbrev; @@ -45,39 +45,39 @@ class DWARFContext { SectionData m_data_debug_tu_index; SectionData m_data_debug_types; - const DWARFDataExtractor & + const lldb_private::DWARFDataExtractor & LoadOrGetSection(std::optional main_section_type, std::optional dwo_section_type, SectionData &data); - const DWARFDataExtractor &getOrLoadCuIndexData(); - const DWARFDataExtractor &getOrLoadTuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadCuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadTuIndexData(); public: - explicit DWARFContext(SectionList *main_section_list, - SectionList *dwo_section_list) + explicit DWARFContext(lldb_private::SectionList *main_section_list, + lldb_private::SectionList *dwo_section_list) : m_main_section_list(main_section_list), m_dwo_section_list(dwo_section_list) {} - const DWARFDataExtractor &getOrLoadAbbrevData(); - const DWARFDataExtractor &getOrLoadAddrData(); - const DWARFDataExtractor &getOrLoadArangesData(); - const DWARFDataExtractor &getOrLoadDebugInfoData(); - const DWARFDataExtractor &getOrLoadLineData(); - const DWARFDataExtractor &getOrLoadLineStrData(); - const DWARFDataExtractor &getOrLoadLocData(); - const DWARFDataExtractor &getOrLoadLocListsData(); - const DWARFDataExtractor &getOrLoadMacroData(); - const DWARFDataExtractor &getOrLoadRangesData(); - const DWARFDataExtractor &getOrLoadRngListsData(); - const DWARFDataExtractor &getOrLoadStrData(); - const DWARFDataExtractor &getOrLoadStrOffsetsData(); - const DWARFDataExtractor &getOrLoadDebugTypesData(); + const lldb_private::DWARFDataExtractor &getOrLoadAbbrevData(); + const lldb_private::DWARFDataExtractor &getOrLoadAddrData(); + const lldb_private::DWARFDataExtractor &getOrLoadArangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugInfoData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadMacroData(); + const lldb_private::DWARFDataExtractor &getOrLoadRangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadRngListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrOffsetsData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugTypesData(); bool isDwo() { return m_dwo_section_list != nullptr; } llvm::DWARFContext &GetAsLLVM(); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index b31c5dcac91851d..62ef0eb356b0bc7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; namespace { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 031ea26ad405094..3e4a9ff4d446638 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" +namespace lldb_plugin::dwarf { class DWARFDIE : public DWARFBaseDIE { public: class child_iterator; @@ -126,5 +127,6 @@ class DWARFDIE::child_iterator return *this; } }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h index b9526b079c1e9da..41b8e9ad0217b69 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h @@ -33,6 +33,6 @@ class DWARFDataExtractor : public DataExtractor { llvm::DWARFDataExtractor GetAsLLVMDWARF() const; llvm::DataExtractor GetAsLLVM() const; }; -} +} // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDATAEXTRACTOR_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp index f3c2755c5a527cc..0882f8add83062b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // DWARFDebugAbbrev constructor DWARFDebugAbbrev::DWARFDebugAbbrev(const DWARFDataExtractor &data) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h index d2fade0934c8a88..0db4a79bf5b1dd7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h @@ -17,6 +17,7 @@ #include +namespace lldb_plugin::dwarf { using DWARFAbbreviationDeclaration = llvm::DWARFAbbreviationDeclaration; using DWARFAbbreviationDeclarationSet = llvm::DWARFAbbreviationDeclarationSet; @@ -51,5 +52,6 @@ class DWARFDebugAbbrev { mutable DWARFAbbreviationDeclarationCollMapConstIter m_prev_abbr_offset_pos; mutable std::optional m_data; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGABBREV_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index 03cbfd28ae7413a..61f1f5f1aa8baec 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -13,6 +13,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugArangeSet::DWARFDebugArangeSet() : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h index 3c8633eaa3cce85..b1aaade00fbb126 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFDebugArangeSet { public: struct Header { @@ -62,5 +63,6 @@ class DWARFDebugArangeSet { Header m_header; DescriptorColl m_arange_descriptors; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index b38dd2b88c9d0b0..a5beafe593c2c1e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h index 5ff37e400c88403..e0a852faa40b80c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -13,6 +13,7 @@ #include "lldb/Utility/RangeMap.h" #include "llvm/Support/Error.h" +namespace lldb_plugin::dwarf { class DWARFDebugAranges { protected: typedef lldb_private::RangeDataVector @@ -50,5 +51,6 @@ class DWARFDebugAranges { protected: RangeToDIE m_aranges; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp index 9a33d6338b87d3e..54609a1b77fc6cb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -27,10 +27,10 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor -DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context) +DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index c990ac9fbe58310..42ab0b55ba41d28 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -19,20 +19,17 @@ #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext; -} class DWARFDebugInfo { public: - typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, - DWARFUnit *cu, + typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, DWARFUnit *cu, DWARFDebugInfoEntry *die, const dw_offset_t next_offset, const uint32_t depth, void *userData); - explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context); + explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context); size_t GetNumUnits(); DWARFUnit *GetUnitAtIndex(size_t idx); @@ -58,7 +55,7 @@ class DWARFDebugInfo { typedef std::vector UnitColl; SymbolFileDWARF &m_dwarf; - lldb_private::DWARFContext &m_context; + DWARFContext &m_context; llvm::once_flag m_units_once_flag; UnitColl m_units; @@ -80,5 +77,6 @@ class DWARFDebugInfo { DWARFDebugInfo(const DWARFDebugInfo &) = delete; const DWARFDebugInfo &operator=(const DWARFDebugInfo &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a08637aef066978..2175caa76a34d38 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -34,6 +34,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; // Extract a debug info entry for a given DWARFUnit from the data diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index c2ea40065232e72..27a52c20e6c0f9a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -23,6 +23,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; #define DIE_SIBLING_IDX_BITSIZE 31 @@ -191,5 +192,6 @@ class DWARFDebugInfoEntry { void GetAttributes(DWARFUnit *cu, DWARFAttributes &attrs, Recurse recurse, uint32_t curr_depth) const; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFOENTRY_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp index 19c6448c4e74a06..cde5730d51e33fa 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp @@ -15,6 +15,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFDebugMacroHeader DWARFDebugMacroHeader::ParseHeader(const DWARFDataExtractor &debug_macro_data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h index cbf762458331bcd..fd506b10b26b3da 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h @@ -17,11 +17,10 @@ #include "lldb/lldb-types.h" namespace lldb_private { - class DWARFDataExtractor; +} -} // namespace lldb_private - +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFDebugMacroHeader { @@ -57,5 +56,6 @@ class DWARFDebugMacroEntry { SymbolFileDWARF *sym_file_dwarf, lldb_private::DebugMacrosSP &debug_macros_sp); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGMACRO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b5bb23a4981f89..e37d22f07337763 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugRanges::DWARFDebugRanges() : m_range_map() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index 2e06cd5daf6f32d..78724a1d19223e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -12,21 +12,21 @@ #include "lldb/Core/dwarf.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; -namespace lldb_private { class DWARFContext; -} class DWARFDebugRanges { public: DWARFDebugRanges(); - void Extract(lldb_private::DWARFContext &context); + void Extract(DWARFContext &context); DWARFRangeList FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset) const; protected: std::map m_range_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGRANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp index 393de0038e651f3..a20dfef6bf89921 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp @@ -9,6 +9,7 @@ #include "DWARFDeclContext.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; const char *DWARFDeclContext::GetQualifiedName() const { if (m_qualified_name.empty()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index 13e3dfb70c0cc80..03ed4e898d566fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -16,6 +16,7 @@ #include #include +namespace lldb_plugin::dwarf { // DWARFDeclContext // // A class that represents a declaration context all the way down to a @@ -82,5 +83,6 @@ class DWARFDeclContext { collection m_entries; mutable std::string m_qualified_name; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDECLCONTEXT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 4e99a295ce50f7d..e30253e18d49cdf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -12,7 +12,7 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { const char *DW_TAG_value_to_name(uint32_t val) { static char invalid[100]; @@ -88,4 +88,4 @@ const char *DW_LNS_value_to_name(uint32_t val) { return llvmstr.data(); } -} // namespace lldb_private +} // namespace lldb_plugin::dwarf diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 2afdbb47381a9cd..dd41724149f9d90 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -12,7 +12,7 @@ #include "lldb/Core/dwarf.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { typedef uint32_t DRC_class; // Holds DRC_* class bitfields @@ -30,6 +30,6 @@ const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 6ca17dcf47ff7ba..7c9f8073a3a01ae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -22,6 +22,7 @@ class DWARFUnit; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index 2a8843c1a0d45df..532afd94ef72c5f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class SymbolFileDWARF; class DWARFDIE; @@ -84,7 +85,8 @@ class DWARFFormValue { // It may be different from compile unit where m_value refers to. const DWARFUnit *m_unit = nullptr; // Unit for this form dw_form_t m_form = dw_form_t(0); // Form for this value - ValueType m_value; // Contains all data for the form + ValueType m_value; // Contains all data for the form }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 779b52481b856d8..791bd2d88696c26 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -17,6 +17,7 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_plugin::dwarf; DWARFIndex::~DWARFIndex() = default; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 13fe96dae2aa1d6..4fd952ddd8fc148 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -17,13 +17,13 @@ #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; class DWARFDIE; -namespace lldb_private { class DWARFIndex { public: - DWARFIndex(Module &module) : m_module(module) {} + DWARFIndex(lldb_private::Module &module) : m_module(module) {} virtual ~DWARFIndex(); virtual void Preload() = 0; @@ -32,53 +32,56 @@ class DWARFIndex { /// (e.g., to only retrieve variables from a given context) should be done by /// the consumer. virtual void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) = 0; virtual void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; /// \a cu must be the skeleton unit if possible, not GetNonSkeletonUnit(). virtual void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) = 0; virtual void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) = 0; virtual void - GetCompleteObjCClass(ConstString class_name, bool must_be_implementation, + GetCompleteObjCClass(lldb_private::ConstString class_name, + bool must_be_implementation, llvm::function_ref callback) = 0; - virtual void GetTypes(ConstString name, + virtual void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) = 0; virtual void - GetNamespaces(ConstString name, + GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void - GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, + SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) = 0; virtual void - GetFunctions(const RegularExpression ®ex, + GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; - virtual void Dump(Stream &s) = 0; + virtual void Dump(lldb_private::Stream &s) = 0; - StatsDuration::Duration GetIndexTime() { return m_index_time; } + lldb_private::StatsDuration::Duration GetIndexTime() { return m_index_time; } protected: - Module &m_module; - StatsDuration m_index_time; + lldb_private::Module &m_module; + lldb_private::StatsDuration m_index_time; /// Helper function implementing common logic for processing function dies. If /// the function given by "ref" matches search criteria given by /// "parent_decl_ctx" and "name_type_mask", it is inserted into the "dies" /// vector. - bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DIERef ref, - SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, - llvm::function_ref callback); + bool + ProcessFunctionDIE(const lldb_private::Module::LookupInfo &lookup_info, + DIERef ref, SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); class DIERefCallbackImpl { public: @@ -102,6 +105,6 @@ class DWARFIndex { void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp index 87af7177ca95ee9..cdacd8425bb8ed4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFTypeUnit::Dump(Stream *s) const { s->Format("{0:x16}: Type Unit: length = {1:x8}, version = {2:x4}, " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5e4d48ab285a9d6..0f7626e3fd2ed2a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -12,6 +12,7 @@ #include "DWARFUnit.h" #include "llvm/Support/Error.h" +namespace lldb_plugin::dwarf { class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} @@ -33,5 +34,6 @@ class DWARFTypeUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFTYPEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 749ffcb094ecfd9..bf9f7a6407c3533 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -27,6 +27,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; @@ -877,8 +878,7 @@ const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() { llvm::Expected DWARFUnitHeader::extract(const DWARFDataExtractor &data, - DIERef::Section section, - lldb_private::DWARFContext &context, + DIERef::Section section, DWARFContext &context, lldb::offset_t *offset_ptr) { DWARFUnitHeader header; header.m_offset = *offset_ptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index bc55b093e894edd..2fc362917dba8d7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -18,6 +18,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFCompileUnit; class NameToDIE; @@ -77,8 +78,7 @@ class DWARFUnitHeader { static llvm::Expected extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &dwarf_context, - lldb::offset_t *offset_ptr); + DWARFContext &dwarf_context, lldb::offset_t *offset_ptr); }; class DWARFUnit : public lldb_private::UserID { @@ -101,6 +101,7 @@ class DWARFUnit : public lldb_private::UserID { class ScopedExtractDIEs { DWARFUnit *m_cu; + public: bool m_clear_dies = false; ScopedExtractDIEs(DWARFUnit &cu); @@ -224,7 +225,9 @@ class DWARFUnit : public lldb_private::UserID { uint8_t GetUnitType() const { return m_header.GetUnitType(); } bool IsTypeUnit() const { return m_header.IsTypeUnit(); } /// Note that this check only works for DWARF5+. - bool IsSkeletonUnit() const { return GetUnitType() == llvm::dwarf::DW_UT_skeleton; } + bool IsSkeletonUnit() const { + return GetUnitType() == llvm::dwarf::DW_UT_skeleton; + } std::optional GetStringOffsetSectionItem(uint32_t index) const; @@ -269,7 +272,6 @@ class DWARFUnit : public lldb_private::UserID { /// True if any DIEs match any tag in \a tags, false otherwise. bool HasAny(llvm::ArrayRef tags); - /// Get the fission .dwo file specific error for this compile unit. /// /// The skeleton compile unit only can have a DWO error. Any other type @@ -348,9 +350,9 @@ class DWARFUnit : public lldb_private::UserID { lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; std::optional m_comp_dir; std::optional m_file_spec; - std::optional m_addr_base; ///< Value of DW_AT_addr_base. - dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. - dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. + std::optional m_addr_base; ///< Value of DW_AT_addr_base. + dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. + dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. std::optional m_gnu_addr_base; std::optional m_gnu_ranges_base; @@ -387,5 +389,6 @@ class DWARFUnit : public lldb_private::UserID { DWARFUnit(const DWARFUnit &) = delete; const DWARFUnit &operator=(const DWARFUnit &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index af2d6c554140bc0..748267e3c8bd010 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; llvm::Expected> DebugNamesDWARFIndex::Create(Module &module, DWARFDataExtractor debug_names, @@ -227,7 +228,7 @@ void DebugNamesDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(name.GetStringRef())) { - dwarf::Tag entry_tag = entry.tag(); + lldb_private::dwarf::Tag entry_tag = entry.tag(); if (entry_tag == DW_TAG_namespace || entry_tag == DW_TAG_imported_declaration) { if (!ProcessEntry(entry, callback)) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index abbd700f1603feb..938b3e50ec0fba5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -17,50 +17,51 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DebugNamesDWARFIndex : public DWARFIndex { public: static llvm::Expected> - Create(Module &module, DWARFDataExtractor debug_names, - DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor debug_names, + lldb_private::DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); void Preload() override { m_fallback.Preload(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override {} void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: - DebugNamesDWARFIndex(Module &module, + DebugNamesDWARFIndex(lldb_private::Module &module, std::unique_ptr debug_names_up, - DWARFDataExtractor debug_names_data, - DWARFDataExtractor debug_str_data, + lldb_private::DWARFDataExtractor debug_names_data, + lldb_private::DWARFDataExtractor debug_str_data, SymbolFileDWARF &dwarf) : DWARFIndex(module), m_debug_info(dwarf.DebugInfo()), m_debug_names_data(debug_names_data), m_debug_str_data(debug_str_data), @@ -71,8 +72,8 @@ class DebugNamesDWARFIndex : public DWARFIndex { // LLVM DWARFDebugNames will hold a non-owning reference to this data, so keep // track of the ownership here. - DWARFDataExtractor m_debug_names_data; - DWARFDataExtractor m_debug_str_data; + lldb_private::DWARFDataExtractor m_debug_names_data; + lldb_private::DWARFDataExtractor m_debug_str_data; using DebugNames = llvm::DWARFDebugNames; std::unique_ptr m_debug_names_up; @@ -89,6 +90,6 @@ class DebugNamesDWARFIndex : public DWARFIndex { static llvm::DenseSet GetUnits(const DebugNames &debug_names); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DEBUGNAMESDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 90f18c96afa230e..d4a4cbcc0f5326f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,6 +28,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void ManualDWARFIndex::Index() { if (m_indexed) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d95cf501face8e4..cd5527459b86d22 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -13,13 +13,13 @@ #include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/ADT/DenseSet.h" +namespace lldb_plugin::dwarf { class DWARFDebugInfo; class SymbolFileDWARFDwo; -namespace lldb_private { class ManualDWARFIndex : public DWARFIndex { public: - ManualDWARFIndex(Module &module, SymbolFileDWARF &dwarf, + ManualDWARFIndex(lldb_private::Module &module, SymbolFileDWARF &dwarf, llvm::DenseSet units_to_avoid = {}) : DWARFIndex(module), m_dwarf(&dwarf), m_units_to_avoid(std::move(units_to_avoid)) {} @@ -27,33 +27,33 @@ class ManualDWARFIndex : public DWARFIndex { void Preload() override { Index(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &unit, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; // Make IndexSet public so we can unit test the encoding and decoding logic. struct IndexSet { @@ -65,8 +65,9 @@ class ManualDWARFIndex : public DWARFIndex { NameToDIE globals; NameToDIE types; NameToDIE namespaces; - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); - void Encode(DataEncoder &encoder) const; + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr); + void Encode(lldb_private::DataEncoder &encoder) const; bool operator==(const IndexSet &rhs) const { return function_basenames == rhs.function_basenames && function_fullnames == rhs.function_fullnames && @@ -94,8 +95,8 @@ class ManualDWARFIndex : public DWARFIndex { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, - bool &signature_mismatch); + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr, bool &signature_mismatch); /// Encode this object into a data encoder object. /// @@ -112,7 +113,7 @@ class ManualDWARFIndex : public DWARFIndex { /// \return /// True if the symbol table's object file can generate a valid signature /// and all data for the symbol table was encoded, false otherwise. - bool Encode(DataEncoder &encoder) const; + bool Encode(lldb_private::DataEncoder &encoder) const; /// Get the cache key string for this symbol table. /// @@ -173,6 +174,6 @@ class ManualDWARFIndex : public DWARFIndex { IndexSet m_set; bool m_indexed = false; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 89e628f5eaf1c47..555c9ca2275a007 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -20,6 +20,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void NameToDIE::Finalize() { m_map.Sort(std::less()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 61df1a628ab5913..4617abc4fca30ef 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -16,6 +16,7 @@ #include "lldb/Core/dwarf.h" #include "lldb/lldb-defines.h" +namespace lldb_plugin::dwarf { class DWARFUnit; class NameToDIE { @@ -45,8 +46,7 @@ class NameToDIE { void ForEach(std::function const - &callback) const; + const DIERef &die_ref)> const &callback) const; /// Decode a serialized version of this object from data. /// @@ -89,5 +89,6 @@ class NameToDIE { protected: lldb_private::UniqueCStringMap m_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index aae481e2ae74177..f8679714e013700 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -97,6 +97,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; LLDB_PLUGIN_DEFINE(SymbolFileDWARF) @@ -136,9 +137,8 @@ static PluginProperties &GetGlobalPluginProperties() { } static const llvm::DWARFDebugLine::LineTable * -ParseLLVMLineTable(lldb_private::DWARFContext &context, - llvm::DWARFDebugLine &line, dw_offset_t line_offset, - dw_offset_t unit_offset) { +ParseLLVMLineTable(DWARFContext &context, llvm::DWARFDebugLine &line, + dw_offset_t line_offset, dw_offset_t unit_offset) { Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -159,7 +159,7 @@ ParseLLVMLineTable(lldb_private::DWARFContext &context, return *line_table; } -static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, +static bool ParseLLVMLineTablePrologue(DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { @@ -2426,7 +2426,7 @@ bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, // ...But if we are only checking root decl contexts, confirm that the // 'die' is a top-level context. if (only_root_namespaces) - return die.GetParent().Tag() == dwarf::DW_TAG_compile_unit; + return die.GetParent().Tag() == llvm::dwarf::DW_TAG_compile_unit; return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 191a5abcf265abd..39b471ba52fce02 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -38,6 +38,9 @@ #include "DWARFIndex.h" #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_plugin::dwarf { // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; @@ -75,7 +78,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; // Static Functions static void Initialize(); @@ -282,7 +285,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; - lldb_private::DWARFContext &GetDWARFContext() { return m_context; } + DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); @@ -531,7 +534,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_dwp_symfile_once_flag; std::shared_ptr m_dwp_symfile; - lldb_private::DWARFContext m_context; + DWARFContext m_context; llvm::once_flag m_info_once_flag; std::unique_ptr m_info; @@ -544,7 +547,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DebugMacrosMap m_debug_macros_map; ExternalTypeModuleMap m_external_type_modules; - std::unique_ptr m_index; + std::unique_ptr m_index; bool m_fetched_external_modules : 1; lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; @@ -575,5 +578,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// an index that identifies the .DWO or .o file. std::optional m_file_index; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index eadedd32e1a4aaf..afd3a9a5c27b08c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -42,6 +42,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDebugMap::ID; @@ -166,6 +167,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( return file_range_map; } +namespace lldb_plugin::dwarf { class DebugMapModule : public Module { public: DebugMapModule(const ModuleSP &exe_module_sp, uint32_t cu_idx, @@ -222,6 +224,7 @@ class DebugMapModule : public Module { ModuleWP m_exe_module_wp; const uint32_t m_cu_idx; }; +} // namespace lldb_plugin::dwarf void SymbolFileDWARFDebugMap::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 881fd4c45ff05a0..d9ad4f0ac077c29 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -20,6 +20,9 @@ #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFCompileUnit; class DWARFDebugAranges; @@ -161,7 +164,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; friend class DebugMapModule; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; friend class DWARFCompileUnit; friend class SymbolFileDWARF; struct OSOInfo { @@ -296,9 +299,10 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { bool Supports_DW_AT_APPLE_objc_complete_type(SymbolFileDWARF *skip_dwarf_oso); - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation); + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation); UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; @@ -403,5 +407,6 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { size_t AddOSOARanges(SymbolFileDWARF *dwarf2Data, DWARFDebugAranges *debug_aranges); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDEBUGMAP_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 78c3c19684e116d..3d92a9c384fb491 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -21,6 +21,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDwo::ID; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index e98ea49d939baf0..22aba785670e228 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -12,6 +12,7 @@ #include "SymbolFileDWARF.h" #include +namespace lldb_plugin::dwarf { class SymbolFileDWARFDwo : public SymbolFileDWARF { /// LLVM RTTI support. static char ID; @@ -65,9 +66,10 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) override; - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation) override; + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation) override; SymbolFileDWARF &GetBaseSymbolFile() const { return m_base_symbol_file; } @@ -77,5 +79,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { SymbolFileDWARF &m_base_symbol_file; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDWO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 22a921cf61389bc..7378b2502dc9358 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/Declaration.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, const lldb_private::Declaration &decl, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index 0947d1e581c5237..6f8ef5d9049e3e9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -16,6 +16,7 @@ #include "DWARFDIE.h" #include "lldb/Core/Declaration.h" +namespace lldb_plugin::dwarf { class UniqueDWARFASTType { public: // Constructors and Destructors @@ -74,8 +75,7 @@ class UniqueDWARFASTTypeMap { ~UniqueDWARFASTTypeMap() = default; - void Insert(lldb_private::ConstString name, - const UniqueDWARFASTType &entry) { + void Insert(lldb_private::ConstString name, const UniqueDWARFASTType &entry) { m_collection[name.GetCString()].Append(entry); } @@ -95,5 +95,6 @@ class UniqueDWARFASTTypeMap { typedef llvm::DenseMap collection; collection m_collection; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_UNIQUEDWARFASTTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 69cff0f35ae4ab2..9ca735e5820db57 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -86,6 +86,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; using namespace clang; using llvm::StringSwitch; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 0544de3cd33befb..b18a7b31cf7acc0 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -514,7 +514,7 @@ class TypeSystemClang : public TypeSystem { size_t bit_size); // TypeSystem methods - DWARFASTParser *GetDWARFParser() override; + lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() override; PDBASTParser *GetPDBParser() override; npdb::PdbAstBuilder *GetNativePDBParser() override; From lldb-commits at lists.llvm.org Tue Oct 3 13:00:49 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Tue, 03 Oct 2023 13:00:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651c72f1.170a0220.752ae.856d@mx.google.com> https://github.com/walter-erquinigo ready_for_review https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Tue Oct 3 13:04:50 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Tue, 03 Oct 2023 13:04:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651c73e2.170a0220.75305.5c07@mx.google.com> https://github.com/walter-erquinigo updated https://github.com/llvm/llvm-project/pull/68150 >From 98c009c5f33b81d579fd11d11a660026f33836ae Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 16:56:16 -0400 Subject: [PATCH] [LLDB][NFC] Create a namespace for the DWARF plugin As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only two classes that I didn't change are DWARFDataExtractor, because that's being explicitly exported as part of lldb_private in `lldb-forward.h` , and the ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser, but I'm okay with changing that. In any case, even if I didn't need this for my work, adding this namespace could be considered a good practice. --- .../include/lldb/Expression/DWARFExpression.h | 24 ++- .../lldb/Expression/DWARFExpressionList.h | 9 +- lldb/include/lldb/Symbol/TypeSystem.h | 8 +- lldb/source/Expression/DWARFExpression.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.h | 35 ++-- .../Plugins/SymbolFile/DWARF/DIERef.cpp | 1 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 7 +- .../SymbolFile/DWARF/DWARFASTParser.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 5 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 + .../SymbolFile/DWARF/DWARFASTParserClang.h | 158 ++++++++++-------- .../SymbolFile/DWARF/DWARFAttribute.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 3 + .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp | 5 +- .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.h | 2 + .../SymbolFile/DWARF/DWARFCompileUnit.cpp | 1 + .../SymbolFile/DWARF/DWARFCompileUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.h | 48 +++--- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 2 + .../SymbolFile/DWARF/DWARFDataExtractor.h | 2 +- .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugArangeSet.h | 2 + .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAranges.h | 2 + .../SymbolFile/DWARF/DWARFDebugInfo.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 12 +- .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugInfoEntry.h | 2 + .../SymbolFile/DWARF/DWARFDebugMacro.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugMacro.h | 6 +- .../SymbolFile/DWARF/DWARFDebugRanges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugRanges.h | 6 +- .../SymbolFile/DWARF/DWARFDeclContext.cpp | 1 + .../SymbolFile/DWARF/DWARFDeclContext.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 4 +- .../SymbolFile/DWARF/DWARFFormValue.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 43 ++--- .../SymbolFile/DWARF/DWARFTypeUnit.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 21 ++- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 17 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 39 ++--- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/ManualDWARFIndex.h | 37 ++-- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/NameToDIE.h | 5 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 10 +- .../SymbolFile/DWARF/SymbolFileDWARF.h | 12 +- .../DWARF/SymbolFileDWARFDebugMap.cpp | 3 + .../DWARF/SymbolFileDWARFDebugMap.h | 13 +- .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp | 1 + .../SymbolFile/DWARF/SymbolFileDWARFDwo.h | 9 +- .../SymbolFile/DWARF/UniqueDWARFASTType.cpp | 1 + .../SymbolFile/DWARF/UniqueDWARFASTType.h | 5 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 1 + .../TypeSystem/Clang/TypeSystemClang.h | 2 +- 63 files changed, 364 insertions(+), 243 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 5e03f539a272cac..4ed3881eb513c99 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -64,18 +66,21 @@ class DWARFExpression { /// \return /// The address specified by the operation, if the operation exists, or /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, - bool &error) const; + lldb::addr_t + GetLocation_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + bool &error) const; - bool Update_DW_OP_addr(const DWARFUnit *dwarf_cu, lldb::addr_t file_addr); + bool Update_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - bool ContainsThreadLocalStorage(const DWARFUnit *dwarf_cu) const; + bool ContainsThreadLocalStorage( + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) const; bool LinkThreadLocalStorage( - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, std::function const &link_address_callback); @@ -128,15 +133,16 @@ class DWARFExpression { /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, lldb::ModuleSP module_sp, const DataExtractor &opcodes, - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); - static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, - const DataExtractor &data, - DWARFExpressionList *loc_list); + static bool + ParseDWARFLocationList(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + const DataExtractor &data, + DWARFExpressionList *loc_list); bool GetExpressionData(DataExtractor &data) const { data = m_data; diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index c0939647056dcbf..c44ff71a7eef1bf 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -13,7 +13,9 @@ #include "lldb/Utility/RangeMap.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -24,13 +26,14 @@ class DWARFExpressionList { public: DWARFExpressionList() = default; - DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + DWARFExpressionList(lldb::ModuleSP module_sp, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t func_file_addr) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), m_func_file_addr(func_file_addr) {} DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, - const DWARFUnit *dwarf_cu) + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { AddExpression(0, LLDB_INVALID_ADDRESS, expr); } @@ -136,7 +139,7 @@ class DWARFExpressionList { /// The DWARF compile unit this expression belongs to. It is used to evaluate /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; + const lldb_plugin::dwarf::DWARFUnit *m_dwarf_cu = nullptr; // Function base file address. lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index eb6e453e1aec0d0..4ef22a02d8c2d3f 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -28,8 +28,11 @@ #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFDIE; class DWARFASTParser; +} // namespace lldb_plugin::dwarf + class PDBASTParser; namespace lldb_private { @@ -93,7 +96,10 @@ class TypeSystem : public PluginInterface, /// removing all the TypeSystems from the TypeSystemMap. virtual void Finalize() {} - virtual DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() { + return nullptr; + } + virtual PDBASTParser *GetPDBParser() { return nullptr; } virtual npdb::PdbAstBuilder *GetNativePDBParser() { return nullptr; } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 93fcf0579be0b18..18528d78f1cf01b 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,6 +45,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_data() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 34fb98b5a9b690a..286fadfb2dcdaf6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::unique_ptr AppleDWARFIndex::Create( Module &module, DWARFDataExtractor apple_names, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h index 6b948e07989531e..ab7d4659e56cbe4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -12,15 +12,18 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class AppleDWARFIndex : public DWARFIndex { public: static std::unique_ptr - Create(Module &module, DWARFDataExtractor apple_names, - DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, - DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor apple_names, + lldb_private::DWARFDataExtractor apple_namespaces, + lldb_private::DWARFDataExtractor apple_types, + lldb_private::DWARFDataExtractor apple_objc, + lldb_private::DWARFDataExtractor debug_str); - AppleDWARFIndex(Module &module, + AppleDWARFIndex(lldb_private::Module &module, std::unique_ptr apple_names, std::unique_ptr apple_namespaces, std::unique_ptr apple_types, @@ -33,33 +36,33 @@ class AppleDWARFIndex : public DWARFIndex { void Preload() override {} void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: std::unique_ptr m_apple_names_up; @@ -77,6 +80,6 @@ class AppleDWARFIndex : public DWARFIndex { std::optional search_for_tag = std::nullopt, std::optional search_for_qualhash = std::nullopt); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_APPLEDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 88a5e6027557b9b..d4c6b043fec359c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -14,6 +14,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index b5a5cfe263f7804..939336dab2be6e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -14,6 +14,7 @@ #include #include +namespace lldb_plugin::dwarf { /// Identifies a DWARF debug info entry within a given Module. It contains three /// "coordinates": /// - file_index: identifies the separate stand alone debug info file @@ -131,10 +132,12 @@ class DIERef { static_assert(sizeof(DIERef) == 8); typedef std::vector DIEArray; +} // namespace lldb_plugin::dwarf namespace llvm { -template<> struct format_provider { - static void format(const DIERef &ref, raw_ostream &OS, StringRef Style); +template <> struct format_provider { + static void format(const lldb_plugin::dwarf::DIERef &ref, raw_ostream &OS, + StringRef Style); }; } // namespace llvm diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index a68b7cd110eb719..06be740ad398d7d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -18,6 +18,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::optional DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 18825ae060b12fe..cf9f4bf8cdf30e4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -17,11 +17,13 @@ #include "lldb/lldb-enumerations.h" #include -class DWARFDIE; namespace lldb_private { class CompileUnit; class ExecutionContext; } + +namespace lldb_plugin::dwarf { +class DWARFDIE; class SymbolFileDWARF; class DWARFASTParser { @@ -65,5 +67,6 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..817ae92e962b3d6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -60,6 +60,8 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; + DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) : m_ast(ast), m_die_to_decl_ctx(), m_decl_ctx_to_die() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 88bfc490e890744..8a8c8f48af16f97 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -31,12 +31,14 @@ namespace lldb_private { class CompileUnit; } +namespace lldb_plugin::dwarf { class DWARFDebugInfoEntry; class SymbolFileDWARF; +} // namespace lldb_plugin::dwarf struct ParsedDWARFTypeAttributes; -class DWARFASTParserClang : public DWARFASTParser { +class DWARFASTParserClang : public lldb_plugin::dwarf::DWARFASTParser { public: DWARFASTParserClang(lldb_private::TypeSystemClang &ast); @@ -44,32 +46,33 @@ class DWARFASTParserClang : public DWARFASTParser { // DWARFASTParser interface. lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, bool *type_is_new_ptr) override; - lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; + lldb_private::ConstString ConstructDemangledNameFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + CompleteTypeFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + GetDeclForUIDFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext - GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -85,9 +88,9 @@ class DWARFASTParserClang : public DWARFASTParser { /// DWARFFormValue with the bit width of the given integer type. /// Returns an error if the value in the DWARFFormValue does not fit /// into the given integer type or the integer type isn't supported. - llvm::Expected - ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, - const DWARFFormValue &form_value) const; + llvm::Expected ExtractIntFromFormValue( + const lldb_private::CompilerType &int_type, + const lldb_plugin::dwarf::DWARFFormValue &form_value) const; /// Returns the template parameters of a class DWARFDIE as a string. /// @@ -100,7 +103,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) override; + GetDIEClassTemplateParams(const lldb_plugin::dwarf::DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -108,14 +111,17 @@ class DWARFASTParserClang : public DWARFASTParser { class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; - typedef llvm::DenseMap DIEToModuleMap; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclMap; lldb_private::TypeSystemClang &m_ast; @@ -126,11 +132,13 @@ class DWARFASTParserClang : public DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); + clang::DeclContext * + GetDeclContextForBlock(const lldb_plugin::dwarf::DWARFDIE &die); - clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); + clang::BlockDecl *ResolveBlockDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveNamespaceDIE(const lldb_plugin::dwarf::DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -141,31 +149,34 @@ class DWARFASTParserClang : public DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveImportedDeclarationDIE(const lldb_plugin::dwarf::DWARFDIE &die); - bool ParseTemplateDIE(const DWARFDIE &die, + bool ParseTemplateDIE(const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); + std::string + GetCPlusPlusQualifiedName(const lldb_plugin::dwarf::DWARFDIE &die); bool ParseChildMembers( - const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, + const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, + std::vector &member_function_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const DWARFDIE &parent_die, bool skip_artificial, - bool &is_static, bool &is_variadic, + const lldb_plugin::dwarf::DWARFDIE &parent_die, + bool skip_artificial, bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, @@ -173,33 +184,40 @@ class DWARFASTParserClang : public DWARFASTParser { size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, bool is_signed, uint32_t enumerator_byte_size, - const DWARFDIE &parent_die); + const lldb_plugin::dwarf::DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); + lldb_private::Type *GetTypeForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); + clang::Decl *GetClangDeclForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, - DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextContainingDIE(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_plugin::dwarf::DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID + GetOwningClangModule(const lldb_plugin::dwarf::DWARFDIE &die); - bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, - const DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes( + const lldb_plugin::dwarf::DWARFDIE &src_class_die, + const lldb_plugin::dwarf::DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetCachedClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, + const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, + const lldb_plugin::dwarf::DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. @@ -207,16 +225,17 @@ class DWARFASTParserClang : public DWARFASTParser { /// Returns \p type_sp. lldb::TypeSP UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); + const lldb_plugin::dwarf::DWARFDIE &die, + lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + lldb::ModuleSP GetModuleForType(const lldb_plugin::dwarf::DWARFDIE &die); private: struct FieldInfo { @@ -268,32 +287,37 @@ class DWARFASTParserClang : public DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, + void ParseObjCProperty(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, DelayedPropertyList &delayed_properties); void - ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + ParseSingleMember(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info); - bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteRecordType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteEnumType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const DWARFDIE &die, + lldb::TypeSP ParseArrayType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + lldb::TypeSP ParsePointerToMemberType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. @@ -311,7 +335,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -328,7 +353,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, + ParseRustVariantPart(lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -338,7 +364,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + explicit ParsedDWARFTypeAttributes(const lldb_plugin::dwarf::DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -355,12 +381,12 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; + lldb_plugin::dwarf::DWARFDIE object_pointer; + lldb_plugin::dwarf::DWARFFormValue abstract_origin; + lldb_plugin::dwarf::DWARFFormValue containing_type; + lldb_plugin::dwarf::DWARFFormValue signature; + lldb_plugin::dwarf::DWARFFormValue specification; + lldb_plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; size_t calling_convention = llvm::dwarf::DW_CC_normal; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp index 00b56537ae2b5fe..48bd62012afb393 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp @@ -11,6 +11,7 @@ #include "DWARFDebugInfo.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFAttributes::DWARFAttributes() : m_infos() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index 90e12fa024936d6..d0f152608dde415 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFAttribute { @@ -31,6 +32,7 @@ class DWARFAttribute { form = m_form; val = m_value; } + protected: dw_attr_t m_attr; dw_form_t m_form; @@ -72,5 +74,6 @@ class DWARFAttributes { typedef llvm::SmallVector collection; collection m_infos; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp index 37a917c3a7661eb..9ab7f0651d93d1f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp @@ -18,6 +18,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; std::optional DWARFBaseDIE::GetDIERef() const { if (!IsValid()) @@ -35,7 +36,7 @@ dw_tag_t DWARFBaseDIE::Tag() const { } const char *DWARFBaseDIE::GetTagAsCString() const { - return lldb_private::DW_TAG_value_to_name(Tag()); + return DW_TAG_value_to_name(Tag()); } const char *DWARFBaseDIE::GetAttributeValueAsString(const dw_attr_t attr, @@ -120,6 +121,7 @@ DWARFAttributes DWARFBaseDIE::GetAttributes(Recurse recurse) const { return DWARFAttributes(); } +namespace lldb_plugin::dwarf { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return lhs.GetDIE() == rhs.GetDIE() && lhs.GetCU() == rhs.GetCU(); } @@ -127,6 +129,7 @@ bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return !(lhs == rhs); } +} // namespace lldb_plugin::dwarf const DWARFDataExtractor &DWARFBaseDIE::GetData() const { // Clients must check if this DIE is valid before calling this function. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h index 8bcf807ad163a60..f7948ae13716e91 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include +namespace lldb_plugin::dwarf { class DIERef; class DWARFASTParser; class DWARFAttributes; @@ -124,5 +125,6 @@ class DWARFBaseDIE { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFBASEDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp index f839a59bf6c390b..6f18c3ddf605f1d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFCompileUnit::Dump(Stream *s) const { s->Format( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index 65debac4c7d9265..574ede2acb5ceab 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_plugin::dwarf { class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; @@ -40,5 +41,6 @@ class DWARFCompileUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFCOMPILEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index f72dad88e157592..d7979a43e46e8a2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; static DWARFDataExtractor LoadSection(SectionList *section_list, SectionType section_type) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index 7df776b5f514155..cabbb0bffbf6616 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -16,16 +16,16 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext { private: - SectionList *m_main_section_list; - SectionList *m_dwo_section_list; + lldb_private::SectionList *m_main_section_list; + lldb_private::SectionList *m_dwo_section_list; mutable std::unique_ptr m_llvm_context; struct SectionData { llvm::once_flag flag; - DWARFDataExtractor data; + lldb_private::DWARFDataExtractor data; }; SectionData m_data_debug_abbrev; @@ -45,39 +45,39 @@ class DWARFContext { SectionData m_data_debug_tu_index; SectionData m_data_debug_types; - const DWARFDataExtractor & + const lldb_private::DWARFDataExtractor & LoadOrGetSection(std::optional main_section_type, std::optional dwo_section_type, SectionData &data); - const DWARFDataExtractor &getOrLoadCuIndexData(); - const DWARFDataExtractor &getOrLoadTuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadCuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadTuIndexData(); public: - explicit DWARFContext(SectionList *main_section_list, - SectionList *dwo_section_list) + explicit DWARFContext(lldb_private::SectionList *main_section_list, + lldb_private::SectionList *dwo_section_list) : m_main_section_list(main_section_list), m_dwo_section_list(dwo_section_list) {} - const DWARFDataExtractor &getOrLoadAbbrevData(); - const DWARFDataExtractor &getOrLoadAddrData(); - const DWARFDataExtractor &getOrLoadArangesData(); - const DWARFDataExtractor &getOrLoadDebugInfoData(); - const DWARFDataExtractor &getOrLoadLineData(); - const DWARFDataExtractor &getOrLoadLineStrData(); - const DWARFDataExtractor &getOrLoadLocData(); - const DWARFDataExtractor &getOrLoadLocListsData(); - const DWARFDataExtractor &getOrLoadMacroData(); - const DWARFDataExtractor &getOrLoadRangesData(); - const DWARFDataExtractor &getOrLoadRngListsData(); - const DWARFDataExtractor &getOrLoadStrData(); - const DWARFDataExtractor &getOrLoadStrOffsetsData(); - const DWARFDataExtractor &getOrLoadDebugTypesData(); + const lldb_private::DWARFDataExtractor &getOrLoadAbbrevData(); + const lldb_private::DWARFDataExtractor &getOrLoadAddrData(); + const lldb_private::DWARFDataExtractor &getOrLoadArangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugInfoData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadMacroData(); + const lldb_private::DWARFDataExtractor &getOrLoadRangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadRngListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrOffsetsData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugTypesData(); bool isDwo() { return m_dwo_section_list != nullptr; } llvm::DWARFContext &GetAsLLVM(); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index b31c5dcac91851d..62ef0eb356b0bc7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; namespace { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 031ea26ad405094..3e4a9ff4d446638 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" +namespace lldb_plugin::dwarf { class DWARFDIE : public DWARFBaseDIE { public: class child_iterator; @@ -126,5 +127,6 @@ class DWARFDIE::child_iterator return *this; } }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h index b9526b079c1e9da..41b8e9ad0217b69 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h @@ -33,6 +33,6 @@ class DWARFDataExtractor : public DataExtractor { llvm::DWARFDataExtractor GetAsLLVMDWARF() const; llvm::DataExtractor GetAsLLVM() const; }; -} +} // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDATAEXTRACTOR_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index 03cbfd28ae7413a..61f1f5f1aa8baec 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -13,6 +13,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugArangeSet::DWARFDebugArangeSet() : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h index 3c8633eaa3cce85..b1aaade00fbb126 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFDebugArangeSet { public: struct Header { @@ -62,5 +63,6 @@ class DWARFDebugArangeSet { Header m_header; DescriptorColl m_arange_descriptors; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index b38dd2b88c9d0b0..a5beafe593c2c1e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h index 5ff37e400c88403..e0a852faa40b80c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -13,6 +13,7 @@ #include "lldb/Utility/RangeMap.h" #include "llvm/Support/Error.h" +namespace lldb_plugin::dwarf { class DWARFDebugAranges { protected: typedef lldb_private::RangeDataVector @@ -50,5 +51,6 @@ class DWARFDebugAranges { protected: RangeToDIE m_aranges; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp index 9a33d6338b87d3e..54609a1b77fc6cb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -27,10 +27,10 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor -DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context) +DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index c990ac9fbe58310..42ab0b55ba41d28 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -19,20 +19,17 @@ #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext; -} class DWARFDebugInfo { public: - typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, - DWARFUnit *cu, + typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, DWARFUnit *cu, DWARFDebugInfoEntry *die, const dw_offset_t next_offset, const uint32_t depth, void *userData); - explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context); + explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context); size_t GetNumUnits(); DWARFUnit *GetUnitAtIndex(size_t idx); @@ -58,7 +55,7 @@ class DWARFDebugInfo { typedef std::vector UnitColl; SymbolFileDWARF &m_dwarf; - lldb_private::DWARFContext &m_context; + DWARFContext &m_context; llvm::once_flag m_units_once_flag; UnitColl m_units; @@ -80,5 +77,6 @@ class DWARFDebugInfo { DWARFDebugInfo(const DWARFDebugInfo &) = delete; const DWARFDebugInfo &operator=(const DWARFDebugInfo &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a6ab83700904cb9..6405ecc20542f41 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -35,6 +35,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; // Extract a debug info entry for a given DWARFUnit from the data diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index 29db44a16bb1281..cfcf91a582e44e9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -22,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; #define DIE_SIBLING_IDX_BITSIZE 31 @@ -190,5 +191,6 @@ class DWARFDebugInfoEntry { void GetAttributes(DWARFUnit *cu, DWARFAttributes &attrs, Recurse recurse, uint32_t curr_depth) const; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFOENTRY_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp index 19c6448c4e74a06..cde5730d51e33fa 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp @@ -15,6 +15,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFDebugMacroHeader DWARFDebugMacroHeader::ParseHeader(const DWARFDataExtractor &debug_macro_data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h index cbf762458331bcd..fd506b10b26b3da 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h @@ -17,11 +17,10 @@ #include "lldb/lldb-types.h" namespace lldb_private { - class DWARFDataExtractor; +} -} // namespace lldb_private - +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFDebugMacroHeader { @@ -57,5 +56,6 @@ class DWARFDebugMacroEntry { SymbolFileDWARF *sym_file_dwarf, lldb_private::DebugMacrosSP &debug_macros_sp); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGMACRO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b5bb23a4981f89..e37d22f07337763 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugRanges::DWARFDebugRanges() : m_range_map() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index 2e06cd5daf6f32d..78724a1d19223e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -12,21 +12,21 @@ #include "lldb/Core/dwarf.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; -namespace lldb_private { class DWARFContext; -} class DWARFDebugRanges { public: DWARFDebugRanges(); - void Extract(lldb_private::DWARFContext &context); + void Extract(DWARFContext &context); DWARFRangeList FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset) const; protected: std::map m_range_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGRANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp index 393de0038e651f3..a20dfef6bf89921 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp @@ -9,6 +9,7 @@ #include "DWARFDeclContext.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; const char *DWARFDeclContext::GetQualifiedName() const { if (m_qualified_name.empty()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index 13e3dfb70c0cc80..03ed4e898d566fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -16,6 +16,7 @@ #include #include +namespace lldb_plugin::dwarf { // DWARFDeclContext // // A class that represents a declaration context all the way down to a @@ -82,5 +83,6 @@ class DWARFDeclContext { collection m_entries; mutable std::string m_qualified_name; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDECLCONTEXT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 4e99a295ce50f7d..e30253e18d49cdf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -12,7 +12,7 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { const char *DW_TAG_value_to_name(uint32_t val) { static char invalid[100]; @@ -88,4 +88,4 @@ const char *DW_LNS_value_to_name(uint32_t val) { return llvmstr.data(); } -} // namespace lldb_private +} // namespace lldb_plugin::dwarf diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 2afdbb47381a9cd..dd41724149f9d90 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -12,7 +12,7 @@ #include "lldb/Core/dwarf.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { typedef uint32_t DRC_class; // Holds DRC_* class bitfields @@ -30,6 +30,6 @@ const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 6ca17dcf47ff7ba..7c9f8073a3a01ae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -22,6 +22,7 @@ class DWARFUnit; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index 2a8843c1a0d45df..532afd94ef72c5f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class SymbolFileDWARF; class DWARFDIE; @@ -84,7 +85,8 @@ class DWARFFormValue { // It may be different from compile unit where m_value refers to. const DWARFUnit *m_unit = nullptr; // Unit for this form dw_form_t m_form = dw_form_t(0); // Form for this value - ValueType m_value; // Contains all data for the form + ValueType m_value; // Contains all data for the form }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 779b52481b856d8..791bd2d88696c26 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -17,6 +17,7 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_plugin::dwarf; DWARFIndex::~DWARFIndex() = default; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 13fe96dae2aa1d6..4fd952ddd8fc148 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -17,13 +17,13 @@ #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; class DWARFDIE; -namespace lldb_private { class DWARFIndex { public: - DWARFIndex(Module &module) : m_module(module) {} + DWARFIndex(lldb_private::Module &module) : m_module(module) {} virtual ~DWARFIndex(); virtual void Preload() = 0; @@ -32,53 +32,56 @@ class DWARFIndex { /// (e.g., to only retrieve variables from a given context) should be done by /// the consumer. virtual void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) = 0; virtual void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; /// \a cu must be the skeleton unit if possible, not GetNonSkeletonUnit(). virtual void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) = 0; virtual void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) = 0; virtual void - GetCompleteObjCClass(ConstString class_name, bool must_be_implementation, + GetCompleteObjCClass(lldb_private::ConstString class_name, + bool must_be_implementation, llvm::function_ref callback) = 0; - virtual void GetTypes(ConstString name, + virtual void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) = 0; virtual void - GetNamespaces(ConstString name, + GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void - GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, + SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) = 0; virtual void - GetFunctions(const RegularExpression ®ex, + GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; - virtual void Dump(Stream &s) = 0; + virtual void Dump(lldb_private::Stream &s) = 0; - StatsDuration::Duration GetIndexTime() { return m_index_time; } + lldb_private::StatsDuration::Duration GetIndexTime() { return m_index_time; } protected: - Module &m_module; - StatsDuration m_index_time; + lldb_private::Module &m_module; + lldb_private::StatsDuration m_index_time; /// Helper function implementing common logic for processing function dies. If /// the function given by "ref" matches search criteria given by /// "parent_decl_ctx" and "name_type_mask", it is inserted into the "dies" /// vector. - bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DIERef ref, - SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, - llvm::function_ref callback); + bool + ProcessFunctionDIE(const lldb_private::Module::LookupInfo &lookup_info, + DIERef ref, SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); class DIERefCallbackImpl { public: @@ -102,6 +105,6 @@ class DWARFIndex { void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp index 87af7177ca95ee9..cdacd8425bb8ed4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFTypeUnit::Dump(Stream *s) const { s->Format("{0:x16}: Type Unit: length = {1:x8}, version = {2:x4}, " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5d939582a312e98..5a85a9dec002fa1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_plugin::dwarf { class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} @@ -37,5 +38,6 @@ class DWARFTypeUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFTYPEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index b51cf04c7b724e5..cf6402ecb77a78d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -28,6 +28,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; @@ -201,8 +202,8 @@ DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs) rhs.m_cu = nullptr; } -DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=( - DWARFUnit::ScopedExtractDIEs &&rhs) { +DWARFUnit::ScopedExtractDIEs & +DWARFUnit::ScopedExtractDIEs::operator=(DWARFUnit::ScopedExtractDIEs &&rhs) { m_cu = rhs.m_cu; rhs.m_cu = nullptr; m_clear_dies = rhs.m_clear_dies; @@ -311,9 +312,9 @@ void DWARFUnit::ExtractDIEsRWLocked() { } if (!m_die_array.empty()) { - // The last die cannot have children (if it did, it wouldn't be the last one). - // This only makes a difference for malformed dwarf that does not have a - // terminating null die. + // The last die cannot have children (if it did, it wouldn't be the last + // one). This only makes a difference for malformed dwarf that does not have + // a terminating null die. m_die_array.back().SetHasChildren(false); if (m_first_die) { @@ -720,7 +721,7 @@ void DWARFUnit::ParseProducerInfo() { llvm::SmallVector matches; if (g_swiftlang_version_regex.Execute(producer, &matches)) { - m_producer_version.tryParse(matches[1]); + m_producer_version.tryParse(matches[1]); m_producer = eProducerSwift; } else if (producer.contains("clang")) { if (g_clang_version_regex.Execute(producer, &matches)) @@ -1086,22 +1087,20 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { return ranges; } -llvm::Expected -DWARFUnit::FindRnglistFromIndex(uint32_t index) { +llvm::Expected DWARFUnit::FindRnglistFromIndex(uint32_t index) { llvm::Expected maybe_offset = GetRnglistOffset(index); if (!maybe_offset) return maybe_offset.takeError(); return FindRnglistFromOffset(*maybe_offset); } - bool DWARFUnit::HasAny(llvm::ArrayRef tags) { ExtractUnitDIEIfNeeded(); if (m_dwo) return m_dwo->HasAny(tags); - for (const auto &die: m_die_array) { - for (const auto tag: tags) { + for (const auto &die : m_die_array) { + for (const auto tag : tags) { if (tag == die.Tag()) return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 20871d805e77a87..ec8a6241660f6f1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -19,6 +19,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFCompileUnit; class NameToDIE; @@ -80,8 +81,7 @@ class DWARFUnitHeader { static llvm::Expected extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &dwarf_context, - lldb::offset_t *offset_ptr); + DWARFContext &dwarf_context, lldb::offset_t *offset_ptr); }; class DWARFUnit : public lldb_private::UserID { @@ -104,6 +104,7 @@ class DWARFUnit : public lldb_private::UserID { class ScopedExtractDIEs { DWARFUnit *m_cu; + public: bool m_clear_dies = false; ScopedExtractDIEs(DWARFUnit &cu); @@ -227,7 +228,9 @@ class DWARFUnit : public lldb_private::UserID { uint8_t GetUnitType() const { return m_header.GetUnitType(); } bool IsTypeUnit() const { return m_header.IsTypeUnit(); } /// Note that this check only works for DWARF5+. - bool IsSkeletonUnit() const { return GetUnitType() == llvm::dwarf::DW_UT_skeleton; } + bool IsSkeletonUnit() const { + return GetUnitType() == llvm::dwarf::DW_UT_skeleton; + } std::optional GetStringOffsetSectionItem(uint32_t index) const; @@ -272,7 +275,6 @@ class DWARFUnit : public lldb_private::UserID { /// True if any DIEs match any tag in \a tags, false otherwise. bool HasAny(llvm::ArrayRef tags); - /// Get the fission .dwo file specific error for this compile unit. /// /// The skeleton compile unit only can have a DWO error. Any other type @@ -351,9 +353,9 @@ class DWARFUnit : public lldb_private::UserID { lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; std::optional m_comp_dir; std::optional m_file_spec; - std::optional m_addr_base; ///< Value of DW_AT_addr_base. - dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. - dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. + std::optional m_addr_base; ///< Value of DW_AT_addr_base. + dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. + dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. std::optional m_gnu_addr_base; std::optional m_gnu_ranges_base; @@ -390,5 +392,6 @@ class DWARFUnit : public lldb_private::UserID { DWARFUnit(const DWARFUnit &) = delete; const DWARFUnit &operator=(const DWARFUnit &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index af2d6c554140bc0..748267e3c8bd010 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; llvm::Expected> DebugNamesDWARFIndex::Create(Module &module, DWARFDataExtractor debug_names, @@ -227,7 +228,7 @@ void DebugNamesDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(name.GetStringRef())) { - dwarf::Tag entry_tag = entry.tag(); + lldb_private::dwarf::Tag entry_tag = entry.tag(); if (entry_tag == DW_TAG_namespace || entry_tag == DW_TAG_imported_declaration) { if (!ProcessEntry(entry, callback)) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index abbd700f1603feb..938b3e50ec0fba5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -17,50 +17,51 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DebugNamesDWARFIndex : public DWARFIndex { public: static llvm::Expected> - Create(Module &module, DWARFDataExtractor debug_names, - DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor debug_names, + lldb_private::DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); void Preload() override { m_fallback.Preload(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override {} void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: - DebugNamesDWARFIndex(Module &module, + DebugNamesDWARFIndex(lldb_private::Module &module, std::unique_ptr debug_names_up, - DWARFDataExtractor debug_names_data, - DWARFDataExtractor debug_str_data, + lldb_private::DWARFDataExtractor debug_names_data, + lldb_private::DWARFDataExtractor debug_str_data, SymbolFileDWARF &dwarf) : DWARFIndex(module), m_debug_info(dwarf.DebugInfo()), m_debug_names_data(debug_names_data), m_debug_str_data(debug_str_data), @@ -71,8 +72,8 @@ class DebugNamesDWARFIndex : public DWARFIndex { // LLVM DWARFDebugNames will hold a non-owning reference to this data, so keep // track of the ownership here. - DWARFDataExtractor m_debug_names_data; - DWARFDataExtractor m_debug_str_data; + lldb_private::DWARFDataExtractor m_debug_names_data; + lldb_private::DWARFDataExtractor m_debug_str_data; using DebugNames = llvm::DWARFDebugNames; std::unique_ptr m_debug_names_up; @@ -89,6 +90,6 @@ class DebugNamesDWARFIndex : public DWARFIndex { static llvm::DenseSet GetUnits(const DebugNames &debug_names); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DEBUGNAMESDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 90f18c96afa230e..d4a4cbcc0f5326f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,6 +28,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void ManualDWARFIndex::Index() { if (m_indexed) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d95cf501face8e4..cd5527459b86d22 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -13,13 +13,13 @@ #include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/ADT/DenseSet.h" +namespace lldb_plugin::dwarf { class DWARFDebugInfo; class SymbolFileDWARFDwo; -namespace lldb_private { class ManualDWARFIndex : public DWARFIndex { public: - ManualDWARFIndex(Module &module, SymbolFileDWARF &dwarf, + ManualDWARFIndex(lldb_private::Module &module, SymbolFileDWARF &dwarf, llvm::DenseSet units_to_avoid = {}) : DWARFIndex(module), m_dwarf(&dwarf), m_units_to_avoid(std::move(units_to_avoid)) {} @@ -27,33 +27,33 @@ class ManualDWARFIndex : public DWARFIndex { void Preload() override { Index(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &unit, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; // Make IndexSet public so we can unit test the encoding and decoding logic. struct IndexSet { @@ -65,8 +65,9 @@ class ManualDWARFIndex : public DWARFIndex { NameToDIE globals; NameToDIE types; NameToDIE namespaces; - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); - void Encode(DataEncoder &encoder) const; + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr); + void Encode(lldb_private::DataEncoder &encoder) const; bool operator==(const IndexSet &rhs) const { return function_basenames == rhs.function_basenames && function_fullnames == rhs.function_fullnames && @@ -94,8 +95,8 @@ class ManualDWARFIndex : public DWARFIndex { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, - bool &signature_mismatch); + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr, bool &signature_mismatch); /// Encode this object into a data encoder object. /// @@ -112,7 +113,7 @@ class ManualDWARFIndex : public DWARFIndex { /// \return /// True if the symbol table's object file can generate a valid signature /// and all data for the symbol table was encoded, false otherwise. - bool Encode(DataEncoder &encoder) const; + bool Encode(lldb_private::DataEncoder &encoder) const; /// Get the cache key string for this symbol table. /// @@ -173,6 +174,6 @@ class ManualDWARFIndex : public DWARFIndex { IndexSet m_set; bool m_indexed = false; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 89e628f5eaf1c47..555c9ca2275a007 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -20,6 +20,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void NameToDIE::Finalize() { m_map.Sort(std::less()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 61df1a628ab5913..4617abc4fca30ef 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -16,6 +16,7 @@ #include "lldb/Core/dwarf.h" #include "lldb/lldb-defines.h" +namespace lldb_plugin::dwarf { class DWARFUnit; class NameToDIE { @@ -45,8 +46,7 @@ class NameToDIE { void ForEach(std::function const - &callback) const; + const DIERef &die_ref)> const &callback) const; /// Decode a serialized version of this object from data. /// @@ -89,5 +89,6 @@ class NameToDIE { protected: lldb_private::UniqueCStringMap m_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e472074545a6f07..c6574881e0576c2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -97,6 +97,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; LLDB_PLUGIN_DEFINE(SymbolFileDWARF) @@ -136,9 +137,8 @@ static PluginProperties &GetGlobalPluginProperties() { } static const llvm::DWARFDebugLine::LineTable * -ParseLLVMLineTable(lldb_private::DWARFContext &context, - llvm::DWARFDebugLine &line, dw_offset_t line_offset, - dw_offset_t unit_offset) { +ParseLLVMLineTable(DWARFContext &context, llvm::DWARFDebugLine &line, + dw_offset_t line_offset, dw_offset_t unit_offset) { Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -159,7 +159,7 @@ ParseLLVMLineTable(lldb_private::DWARFContext &context, return *line_table; } -static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, +static bool ParseLLVMLineTablePrologue(DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { @@ -2428,7 +2428,7 @@ bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, // ...But if we are only checking root decl contexts, confirm that the // 'die' is a top-level context. if (only_root_namespaces) - return die.GetParent().Tag() == dwarf::DW_TAG_compile_unit; + return die.GetParent().Tag() == llvm::dwarf::DW_TAG_compile_unit; return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 5aaf8bd270ef7b1..f0cf57b776bb105 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -38,6 +38,9 @@ #include "DWARFIndex.h" #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_plugin::dwarf { // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; @@ -78,7 +81,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; // Static Functions static void Initialize(); @@ -285,7 +288,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; - lldb_private::DWARFContext &GetDWARFContext() { return m_context; } + DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); @@ -534,7 +537,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_dwp_symfile_once_flag; std::shared_ptr m_dwp_symfile; - lldb_private::DWARFContext m_context; + DWARFContext m_context; llvm::once_flag m_info_once_flag; std::unique_ptr m_info; @@ -547,7 +550,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DebugMacrosMap m_debug_macros_map; ExternalTypeModuleMap m_external_type_modules; - std::unique_ptr m_index; + std::unique_ptr m_index; bool m_fetched_external_modules : 1; lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; @@ -578,5 +581,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// an index that identifies the .DWO or .o file. std::optional m_file_index; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index eadedd32e1a4aaf..afd3a9a5c27b08c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -42,6 +42,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDebugMap::ID; @@ -166,6 +167,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( return file_range_map; } +namespace lldb_plugin::dwarf { class DebugMapModule : public Module { public: DebugMapModule(const ModuleSP &exe_module_sp, uint32_t cu_idx, @@ -222,6 +224,7 @@ class DebugMapModule : public Module { ModuleWP m_exe_module_wp; const uint32_t m_cu_idx; }; +} // namespace lldb_plugin::dwarf void SymbolFileDWARFDebugMap::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 881fd4c45ff05a0..d9ad4f0ac077c29 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -20,6 +20,9 @@ #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFCompileUnit; class DWARFDebugAranges; @@ -161,7 +164,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; friend class DebugMapModule; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; friend class DWARFCompileUnit; friend class SymbolFileDWARF; struct OSOInfo { @@ -296,9 +299,10 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { bool Supports_DW_AT_APPLE_objc_complete_type(SymbolFileDWARF *skip_dwarf_oso); - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation); + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation); UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; @@ -403,5 +407,6 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { size_t AddOSOARanges(SymbolFileDWARF *dwarf2Data, DWARFDebugAranges *debug_aranges); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDEBUGMAP_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 78c3c19684e116d..3d92a9c384fb491 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -21,6 +21,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDwo::ID; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index e98ea49d939baf0..22aba785670e228 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -12,6 +12,7 @@ #include "SymbolFileDWARF.h" #include +namespace lldb_plugin::dwarf { class SymbolFileDWARFDwo : public SymbolFileDWARF { /// LLVM RTTI support. static char ID; @@ -65,9 +66,10 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) override; - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation) override; + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation) override; SymbolFileDWARF &GetBaseSymbolFile() const { return m_base_symbol_file; } @@ -77,5 +79,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { SymbolFileDWARF &m_base_symbol_file; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDWO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 22a921cf61389bc..7378b2502dc9358 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/Declaration.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, const lldb_private::Declaration &decl, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index 0947d1e581c5237..6f8ef5d9049e3e9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -16,6 +16,7 @@ #include "DWARFDIE.h" #include "lldb/Core/Declaration.h" +namespace lldb_plugin::dwarf { class UniqueDWARFASTType { public: // Constructors and Destructors @@ -74,8 +75,7 @@ class UniqueDWARFASTTypeMap { ~UniqueDWARFASTTypeMap() = default; - void Insert(lldb_private::ConstString name, - const UniqueDWARFASTType &entry) { + void Insert(lldb_private::ConstString name, const UniqueDWARFASTType &entry) { m_collection[name.GetCString()].Append(entry); } @@ -95,5 +95,6 @@ class UniqueDWARFASTTypeMap { typedef llvm::DenseMap collection; collection m_collection; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_UNIQUEDWARFASTTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 69cff0f35ae4ab2..9ca735e5820db57 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -86,6 +86,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; using namespace clang; using llvm::StringSwitch; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 0544de3cd33befb..b18a7b31cf7acc0 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -514,7 +514,7 @@ class TypeSystemClang : public TypeSystem { size_t bit_size); // TypeSystem methods - DWARFASTParser *GetDWARFParser() override; + lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() override; PDBASTParser *GetPDBParser() override; npdb::PdbAstBuilder *GetNativePDBParser() override; From lldb-commits at lists.llvm.org Tue Oct 3 13:06:47 2023 From: lldb-commits at lists.llvm.org (Stanislav Mekhanoshin via lldb-commits) Date: Tue, 03 Oct 2023 13:06:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651c7457.170a0220.b4c9e.5e6d@mx.google.com> rampitec wrote: > I've just tested this on 10000 graphics shaders and it seems to make no difference at all. I tried gfx900 and gfx1100. Can anyone else from the graphics team confirm this? It seems the most impact is on the pre-gfx9 targets, very similar to https://github.com/llvm/llvm-project/pull/68028 and for the same reason: there were no no-carry add/sub. The rest of the impact is when an add/sub is created late in the pipeline. https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Tue Oct 3 13:17:32 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Tue, 03 Oct 2023 13:17:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651c76dc.a70a0220.217e2.5602@mx.google.com> https://github.com/walter-erquinigo updated https://github.com/llvm/llvm-project/pull/68150 >From 92dc652698d7de826e28ce1563fa804366bab5c0 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 16:56:16 -0400 Subject: [PATCH] [LLDB][NFC] Create a namespace for the DWARF plugin As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only two classes that I didn't change are DWARFDataExtractor, because that's being explicitly exported as part of lldb_private in `lldb-forward.h` , and the ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser, but I'm okay with changing that. In any case, even if I didn't need this for my work, adding this namespace could be considered a good practice. --- .../include/lldb/Expression/DWARFExpression.h | 24 ++- .../lldb/Expression/DWARFExpressionList.h | 9 +- lldb/include/lldb/Symbol/TypeSystem.h | 8 +- lldb/source/Expression/DWARFExpression.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.h | 35 ++-- .../Plugins/SymbolFile/DWARF/DIERef.cpp | 1 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 7 +- .../SymbolFile/DWARF/DWARFASTParser.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 5 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 + .../SymbolFile/DWARF/DWARFASTParserClang.h | 158 ++++++++++-------- .../SymbolFile/DWARF/DWARFAttribute.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 3 + .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp | 5 +- .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.h | 2 + .../SymbolFile/DWARF/DWARFCompileUnit.cpp | 1 + .../SymbolFile/DWARF/DWARFCompileUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.h | 48 +++--- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 2 + .../SymbolFile/DWARF/DWARFDataExtractor.h | 2 +- .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugArangeSet.h | 2 + .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAranges.h | 2 + .../SymbolFile/DWARF/DWARFDebugInfo.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 12 +- .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugInfoEntry.h | 2 + .../SymbolFile/DWARF/DWARFDebugMacro.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugMacro.h | 6 +- .../SymbolFile/DWARF/DWARFDebugRanges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugRanges.h | 6 +- .../SymbolFile/DWARF/DWARFDeclContext.cpp | 1 + .../SymbolFile/DWARF/DWARFDeclContext.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 4 +- .../SymbolFile/DWARF/DWARFFormValue.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 43 ++--- .../SymbolFile/DWARF/DWARFTypeUnit.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 28 ++-- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 17 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 39 ++--- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/ManualDWARFIndex.h | 37 ++-- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/NameToDIE.h | 5 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 10 +- .../SymbolFile/DWARF/SymbolFileDWARF.h | 20 ++- .../DWARF/SymbolFileDWARFDebugMap.cpp | 3 + .../DWARF/SymbolFileDWARFDebugMap.h | 13 +- .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp | 1 + .../SymbolFile/DWARF/SymbolFileDWARFDwo.h | 9 +- .../SymbolFile/DWARF/UniqueDWARFASTType.cpp | 1 + .../SymbolFile/DWARF/UniqueDWARFASTType.h | 5 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 1 + .../TypeSystem/Clang/TypeSystemClang.h | 2 +- 63 files changed, 372 insertions(+), 250 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 5e03f539a272cac..4ed3881eb513c99 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -64,18 +66,21 @@ class DWARFExpression { /// \return /// The address specified by the operation, if the operation exists, or /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, - bool &error) const; + lldb::addr_t + GetLocation_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + bool &error) const; - bool Update_DW_OP_addr(const DWARFUnit *dwarf_cu, lldb::addr_t file_addr); + bool Update_DW_OP_addr(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - bool ContainsThreadLocalStorage(const DWARFUnit *dwarf_cu) const; + bool ContainsThreadLocalStorage( + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) const; bool LinkThreadLocalStorage( - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, std::function const &link_address_callback); @@ -128,15 +133,16 @@ class DWARFExpression { /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, lldb::ModuleSP module_sp, const DataExtractor &opcodes, - const DWARFUnit *dwarf_cu, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); - static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, - const DataExtractor &data, - DWARFExpressionList *loc_list); + static bool + ParseDWARFLocationList(const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, + const DataExtractor &data, + DWARFExpressionList *loc_list); bool GetExpressionData(DataExtractor &data) const { data = m_data; diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index c0939647056dcbf..c44ff71a7eef1bf 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -13,7 +13,9 @@ #include "lldb/Utility/RangeMap.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFUnit; +} // namespace lldb_plugin::dwarf namespace lldb_private { @@ -24,13 +26,14 @@ class DWARFExpressionList { public: DWARFExpressionList() = default; - DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + DWARFExpressionList(lldb::ModuleSP module_sp, + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t func_file_addr) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), m_func_file_addr(func_file_addr) {} DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, - const DWARFUnit *dwarf_cu) + const lldb_plugin::dwarf::DWARFUnit *dwarf_cu) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { AddExpression(0, LLDB_INVALID_ADDRESS, expr); } @@ -136,7 +139,7 @@ class DWARFExpressionList { /// The DWARF compile unit this expression belongs to. It is used to evaluate /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; + const lldb_plugin::dwarf::DWARFUnit *m_dwarf_cu = nullptr; // Function base file address. lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index eb6e453e1aec0d0..4ef22a02d8c2d3f 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -28,8 +28,11 @@ #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-private.h" +namespace lldb_plugin::dwarf { class DWARFDIE; class DWARFASTParser; +} // namespace lldb_plugin::dwarf + class PDBASTParser; namespace lldb_private { @@ -93,7 +96,10 @@ class TypeSystem : public PluginInterface, /// removing all the TypeSystems from the TypeSystemMap. virtual void Finalize() {} - virtual DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() { + return nullptr; + } + virtual PDBASTParser *GetPDBParser() { return nullptr; } virtual npdb::PdbAstBuilder *GetNativePDBParser() { return nullptr; } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 93fcf0579be0b18..18528d78f1cf01b 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,6 +45,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_data() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 34fb98b5a9b690a..286fadfb2dcdaf6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::unique_ptr AppleDWARFIndex::Create( Module &module, DWARFDataExtractor apple_names, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h index 6b948e07989531e..ab7d4659e56cbe4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -12,15 +12,18 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class AppleDWARFIndex : public DWARFIndex { public: static std::unique_ptr - Create(Module &module, DWARFDataExtractor apple_names, - DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, - DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor apple_names, + lldb_private::DWARFDataExtractor apple_namespaces, + lldb_private::DWARFDataExtractor apple_types, + lldb_private::DWARFDataExtractor apple_objc, + lldb_private::DWARFDataExtractor debug_str); - AppleDWARFIndex(Module &module, + AppleDWARFIndex(lldb_private::Module &module, std::unique_ptr apple_names, std::unique_ptr apple_namespaces, std::unique_ptr apple_types, @@ -33,33 +36,33 @@ class AppleDWARFIndex : public DWARFIndex { void Preload() override {} void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: std::unique_ptr m_apple_names_up; @@ -77,6 +80,6 @@ class AppleDWARFIndex : public DWARFIndex { std::optional search_for_tag = std::nullopt, std::optional search_for_qualhash = std::nullopt); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_APPLEDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 88a5e6027557b9b..d4c6b043fec359c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -14,6 +14,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index b5a5cfe263f7804..939336dab2be6e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -14,6 +14,7 @@ #include #include +namespace lldb_plugin::dwarf { /// Identifies a DWARF debug info entry within a given Module. It contains three /// "coordinates": /// - file_index: identifies the separate stand alone debug info file @@ -131,10 +132,12 @@ class DIERef { static_assert(sizeof(DIERef) == 8); typedef std::vector DIEArray; +} // namespace lldb_plugin::dwarf namespace llvm { -template<> struct format_provider { - static void format(const DIERef &ref, raw_ostream &OS, StringRef Style); +template <> struct format_provider { + static void format(const lldb_plugin::dwarf::DIERef &ref, raw_ostream &OS, + StringRef Style); }; } // namespace llvm diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index a68b7cd110eb719..06be740ad398d7d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -18,6 +18,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; std::optional DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 18825ae060b12fe..cf9f4bf8cdf30e4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -17,11 +17,13 @@ #include "lldb/lldb-enumerations.h" #include -class DWARFDIE; namespace lldb_private { class CompileUnit; class ExecutionContext; } + +namespace lldb_plugin::dwarf { +class DWARFDIE; class SymbolFileDWARF; class DWARFASTParser { @@ -65,5 +67,6 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..817ae92e962b3d6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -60,6 +60,8 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; + DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) : m_ast(ast), m_die_to_decl_ctx(), m_decl_ctx_to_die() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 88bfc490e890744..8a8c8f48af16f97 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -31,12 +31,14 @@ namespace lldb_private { class CompileUnit; } +namespace lldb_plugin::dwarf { class DWARFDebugInfoEntry; class SymbolFileDWARF; +} // namespace lldb_plugin::dwarf struct ParsedDWARFTypeAttributes; -class DWARFASTParserClang : public DWARFASTParser { +class DWARFASTParserClang : public lldb_plugin::dwarf::DWARFASTParser { public: DWARFASTParserClang(lldb_private::TypeSystemClang &ast); @@ -44,32 +46,33 @@ class DWARFASTParserClang : public DWARFASTParser { // DWARFASTParser interface. lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, bool *type_is_new_ptr) override; - lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; + lldb_private::ConstString ConstructDemangledNameFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + CompleteTypeFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + GetDeclForUIDFromDWARF(const lldb_plugin::dwarf::DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext - GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( + const lldb_plugin::dwarf::DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -85,9 +88,9 @@ class DWARFASTParserClang : public DWARFASTParser { /// DWARFFormValue with the bit width of the given integer type. /// Returns an error if the value in the DWARFFormValue does not fit /// into the given integer type or the integer type isn't supported. - llvm::Expected - ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, - const DWARFFormValue &form_value) const; + llvm::Expected ExtractIntFromFormValue( + const lldb_private::CompilerType &int_type, + const lldb_plugin::dwarf::DWARFFormValue &form_value) const; /// Returns the template parameters of a class DWARFDIE as a string. /// @@ -100,7 +103,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) override; + GetDIEClassTemplateParams(const lldb_plugin::dwarf::DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -108,14 +111,17 @@ class DWARFASTParserClang : public DWARFASTParser { class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; - typedef llvm::DenseMap DIEToModuleMap; - typedef llvm::DenseMap + typedef llvm::DenseMap DIEToDeclMap; lldb_private::TypeSystemClang &m_ast; @@ -126,11 +132,13 @@ class DWARFASTParserClang : public DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); + clang::DeclContext * + GetDeclContextForBlock(const lldb_plugin::dwarf::DWARFDIE &die); - clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); + clang::BlockDecl *ResolveBlockDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveNamespaceDIE(const lldb_plugin::dwarf::DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -141,31 +149,34 @@ class DWARFASTParserClang : public DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveImportedDeclarationDIE(const lldb_plugin::dwarf::DWARFDIE &die); - bool ParseTemplateDIE(const DWARFDIE &die, + bool ParseTemplateDIE(const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); + std::string + GetCPlusPlusQualifiedName(const lldb_plugin::dwarf::DWARFDIE &die); bool ParseChildMembers( - const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, + const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, + std::vector &member_function_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const DWARFDIE &parent_die, bool skip_artificial, - bool &is_static, bool &is_variadic, + const lldb_plugin::dwarf::DWARFDIE &parent_die, + bool skip_artificial, bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, @@ -173,33 +184,40 @@ class DWARFASTParserClang : public DWARFASTParser { size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, bool is_signed, uint32_t enumerator_byte_size, - const DWARFDIE &parent_die); + const lldb_plugin::dwarf::DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); + lldb_private::Type *GetTypeForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); + clang::Decl *GetClangDeclForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, - DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextContainingDIE(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_plugin::dwarf::DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID + GetOwningClangModule(const lldb_plugin::dwarf::DWARFDIE &die); - bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, - const DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes( + const lldb_plugin::dwarf::DWARFDIE &src_class_die, + const lldb_plugin::dwarf::DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetCachedClangDeclContextForDIE(const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, + const lldb_plugin::dwarf::DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, + const lldb_plugin::dwarf::DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. @@ -207,16 +225,17 @@ class DWARFASTParserClang : public DWARFASTParser { /// Returns \p type_sp. lldb::TypeSP UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); + const lldb_plugin::dwarf::DWARFDIE &die, + lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + lldb::ModuleSP GetModuleForType(const lldb_plugin::dwarf::DWARFDIE &die); private: struct FieldInfo { @@ -268,32 +287,37 @@ class DWARFASTParserClang : public DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, + void ParseObjCProperty(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, DelayedPropertyList &delayed_properties); void - ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + ParseSingleMember(const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info); - bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteRecordType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteEnumType(const lldb_plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const lldb_plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const DWARFDIE &die, + lldb::TypeSP ParseArrayType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + lldb::TypeSP ParsePointerToMemberType(const lldb_plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. @@ -311,7 +335,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -328,7 +353,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, + ParseRustVariantPart(lldb_plugin::dwarf::DWARFDIE &die, + const lldb_plugin::dwarf::DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -338,7 +364,7 @@ class DWARFASTParserClang : public DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + explicit ParsedDWARFTypeAttributes(const lldb_plugin::dwarf::DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -355,12 +381,12 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; + lldb_plugin::dwarf::DWARFDIE object_pointer; + lldb_plugin::dwarf::DWARFFormValue abstract_origin; + lldb_plugin::dwarf::DWARFFormValue containing_type; + lldb_plugin::dwarf::DWARFFormValue signature; + lldb_plugin::dwarf::DWARFFormValue specification; + lldb_plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; size_t calling_convention = llvm::dwarf::DW_CC_normal; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp index 00b56537ae2b5fe..48bd62012afb393 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp @@ -11,6 +11,7 @@ #include "DWARFDebugInfo.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFAttributes::DWARFAttributes() : m_infos() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index 90e12fa024936d6..d0f152608dde415 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFAttribute { @@ -31,6 +32,7 @@ class DWARFAttribute { form = m_form; val = m_value; } + protected: dw_attr_t m_attr; dw_form_t m_form; @@ -72,5 +74,6 @@ class DWARFAttributes { typedef llvm::SmallVector collection; collection m_infos; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp index 37a917c3a7661eb..9ab7f0651d93d1f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp @@ -18,6 +18,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; std::optional DWARFBaseDIE::GetDIERef() const { if (!IsValid()) @@ -35,7 +36,7 @@ dw_tag_t DWARFBaseDIE::Tag() const { } const char *DWARFBaseDIE::GetTagAsCString() const { - return lldb_private::DW_TAG_value_to_name(Tag()); + return DW_TAG_value_to_name(Tag()); } const char *DWARFBaseDIE::GetAttributeValueAsString(const dw_attr_t attr, @@ -120,6 +121,7 @@ DWARFAttributes DWARFBaseDIE::GetAttributes(Recurse recurse) const { return DWARFAttributes(); } +namespace lldb_plugin::dwarf { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return lhs.GetDIE() == rhs.GetDIE() && lhs.GetCU() == rhs.GetCU(); } @@ -127,6 +129,7 @@ bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return !(lhs == rhs); } +} // namespace lldb_plugin::dwarf const DWARFDataExtractor &DWARFBaseDIE::GetData() const { // Clients must check if this DIE is valid before calling this function. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h index 8bcf807ad163a60..f7948ae13716e91 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include +namespace lldb_plugin::dwarf { class DIERef; class DWARFASTParser; class DWARFAttributes; @@ -124,5 +125,6 @@ class DWARFBaseDIE { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFBASEDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp index f839a59bf6c390b..6f18c3ddf605f1d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFCompileUnit::Dump(Stream *s) const { s->Format( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index 65debac4c7d9265..574ede2acb5ceab 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_plugin::dwarf { class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; @@ -40,5 +41,6 @@ class DWARFCompileUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFCOMPILEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index f72dad88e157592..d7979a43e46e8a2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; static DWARFDataExtractor LoadSection(SectionList *section_list, SectionType section_type) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index 7df776b5f514155..cabbb0bffbf6616 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -16,16 +16,16 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext { private: - SectionList *m_main_section_list; - SectionList *m_dwo_section_list; + lldb_private::SectionList *m_main_section_list; + lldb_private::SectionList *m_dwo_section_list; mutable std::unique_ptr m_llvm_context; struct SectionData { llvm::once_flag flag; - DWARFDataExtractor data; + lldb_private::DWARFDataExtractor data; }; SectionData m_data_debug_abbrev; @@ -45,39 +45,39 @@ class DWARFContext { SectionData m_data_debug_tu_index; SectionData m_data_debug_types; - const DWARFDataExtractor & + const lldb_private::DWARFDataExtractor & LoadOrGetSection(std::optional main_section_type, std::optional dwo_section_type, SectionData &data); - const DWARFDataExtractor &getOrLoadCuIndexData(); - const DWARFDataExtractor &getOrLoadTuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadCuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadTuIndexData(); public: - explicit DWARFContext(SectionList *main_section_list, - SectionList *dwo_section_list) + explicit DWARFContext(lldb_private::SectionList *main_section_list, + lldb_private::SectionList *dwo_section_list) : m_main_section_list(main_section_list), m_dwo_section_list(dwo_section_list) {} - const DWARFDataExtractor &getOrLoadAbbrevData(); - const DWARFDataExtractor &getOrLoadAddrData(); - const DWARFDataExtractor &getOrLoadArangesData(); - const DWARFDataExtractor &getOrLoadDebugInfoData(); - const DWARFDataExtractor &getOrLoadLineData(); - const DWARFDataExtractor &getOrLoadLineStrData(); - const DWARFDataExtractor &getOrLoadLocData(); - const DWARFDataExtractor &getOrLoadLocListsData(); - const DWARFDataExtractor &getOrLoadMacroData(); - const DWARFDataExtractor &getOrLoadRangesData(); - const DWARFDataExtractor &getOrLoadRngListsData(); - const DWARFDataExtractor &getOrLoadStrData(); - const DWARFDataExtractor &getOrLoadStrOffsetsData(); - const DWARFDataExtractor &getOrLoadDebugTypesData(); + const lldb_private::DWARFDataExtractor &getOrLoadAbbrevData(); + const lldb_private::DWARFDataExtractor &getOrLoadAddrData(); + const lldb_private::DWARFDataExtractor &getOrLoadArangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugInfoData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadMacroData(); + const lldb_private::DWARFDataExtractor &getOrLoadRangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadRngListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrOffsetsData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugTypesData(); bool isDwo() { return m_dwo_section_list != nullptr; } llvm::DWARFContext &GetAsLLVM(); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index b31c5dcac91851d..62ef0eb356b0bc7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; namespace { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 031ea26ad405094..3e4a9ff4d446638 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" +namespace lldb_plugin::dwarf { class DWARFDIE : public DWARFBaseDIE { public: class child_iterator; @@ -126,5 +127,6 @@ class DWARFDIE::child_iterator return *this; } }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h index b9526b079c1e9da..41b8e9ad0217b69 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h @@ -33,6 +33,6 @@ class DWARFDataExtractor : public DataExtractor { llvm::DWARFDataExtractor GetAsLLVMDWARF() const; llvm::DataExtractor GetAsLLVM() const; }; -} +} // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDATAEXTRACTOR_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index 03cbfd28ae7413a..61f1f5f1aa8baec 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -13,6 +13,7 @@ #include using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugArangeSet::DWARFDebugArangeSet() : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h index 3c8633eaa3cce85..b1aaade00fbb126 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFDebugArangeSet { public: struct Header { @@ -62,5 +63,6 @@ class DWARFDebugArangeSet { Header m_header; DescriptorColl m_arange_descriptors; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index b38dd2b88c9d0b0..a5beafe593c2c1e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h index 5ff37e400c88403..e0a852faa40b80c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -13,6 +13,7 @@ #include "lldb/Utility/RangeMap.h" #include "llvm/Support/Error.h" +namespace lldb_plugin::dwarf { class DWARFDebugAranges { protected: typedef lldb_private::RangeDataVector @@ -50,5 +51,6 @@ class DWARFDebugAranges { protected: RangeToDIE m_aranges; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp index 9a33d6338b87d3e..54609a1b77fc6cb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -27,10 +27,10 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; // Constructor -DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context) +DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index c990ac9fbe58310..42ab0b55ba41d28 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -19,20 +19,17 @@ #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" -namespace lldb_private { +namespace lldb_plugin::dwarf { class DWARFContext; -} class DWARFDebugInfo { public: - typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, - DWARFUnit *cu, + typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, DWARFUnit *cu, DWARFDebugInfoEntry *die, const dw_offset_t next_offset, const uint32_t depth, void *userData); - explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context); + explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context); size_t GetNumUnits(); DWARFUnit *GetUnitAtIndex(size_t idx); @@ -58,7 +55,7 @@ class DWARFDebugInfo { typedef std::vector UnitColl; SymbolFileDWARF &m_dwarf; - lldb_private::DWARFContext &m_context; + DWARFContext &m_context; llvm::once_flag m_units_once_flag; UnitColl m_units; @@ -80,5 +77,6 @@ class DWARFDebugInfo { DWARFDebugInfo(const DWARFDebugInfo &) = delete; const DWARFDebugInfo &operator=(const DWARFDebugInfo &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a6ab83700904cb9..6405ecc20542f41 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -35,6 +35,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; // Extract a debug info entry for a given DWARFUnit from the data diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index 29db44a16bb1281..cfcf91a582e44e9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -22,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; #define DIE_SIBLING_IDX_BITSIZE 31 @@ -190,5 +191,6 @@ class DWARFDebugInfoEntry { void GetAttributes(DWARFUnit *cu, DWARFAttributes &attrs, Recurse recurse, uint32_t curr_depth) const; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFOENTRY_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp index 19c6448c4e74a06..cde5730d51e33fa 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp @@ -15,6 +15,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; DWARFDebugMacroHeader DWARFDebugMacroHeader::ParseHeader(const DWARFDataExtractor &debug_macro_data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h index cbf762458331bcd..fd506b10b26b3da 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h @@ -17,11 +17,10 @@ #include "lldb/lldb-types.h" namespace lldb_private { - class DWARFDataExtractor; +} -} // namespace lldb_private - +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFDebugMacroHeader { @@ -57,5 +56,6 @@ class DWARFDebugMacroEntry { SymbolFileDWARF *sym_file_dwarf, lldb_private::DebugMacrosSP &debug_macros_sp); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGMACRO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b5bb23a4981f89..e37d22f07337763 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" using namespace lldb_private; +using namespace lldb_plugin::dwarf; DWARFDebugRanges::DWARFDebugRanges() : m_range_map() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index 2e06cd5daf6f32d..78724a1d19223e6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -12,21 +12,21 @@ #include "lldb/Core/dwarf.h" #include +namespace lldb_plugin::dwarf { class DWARFUnit; -namespace lldb_private { class DWARFContext; -} class DWARFDebugRanges { public: DWARFDebugRanges(); - void Extract(lldb_private::DWARFContext &context); + void Extract(DWARFContext &context); DWARFRangeList FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset) const; protected: std::map m_range_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGRANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp index 393de0038e651f3..a20dfef6bf89921 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp @@ -9,6 +9,7 @@ #include "DWARFDeclContext.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; const char *DWARFDeclContext::GetQualifiedName() const { if (m_qualified_name.empty()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index 13e3dfb70c0cc80..03ed4e898d566fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -16,6 +16,7 @@ #include #include +namespace lldb_plugin::dwarf { // DWARFDeclContext // // A class that represents a declaration context all the way down to a @@ -82,5 +83,6 @@ class DWARFDeclContext { collection m_entries; mutable std::string m_qualified_name; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDECLCONTEXT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 4e99a295ce50f7d..e30253e18d49cdf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -12,7 +12,7 @@ #include #include -namespace lldb_private { +namespace lldb_plugin::dwarf { const char *DW_TAG_value_to_name(uint32_t val) { static char invalid[100]; @@ -88,4 +88,4 @@ const char *DW_LNS_value_to_name(uint32_t val) { return llvmstr.data(); } -} // namespace lldb_private +} // namespace lldb_plugin::dwarf diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 2afdbb47381a9cd..dd41724149f9d90 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -12,7 +12,7 @@ #include "lldb/Core/dwarf.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { typedef uint32_t DRC_class; // Holds DRC_* class bitfields @@ -30,6 +30,6 @@ const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 6ca17dcf47ff7ba..7c9f8073a3a01ae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -22,6 +22,7 @@ class DWARFUnit; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index 2a8843c1a0d45df..532afd94ef72c5f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class SymbolFileDWARF; class DWARFDIE; @@ -84,7 +85,8 @@ class DWARFFormValue { // It may be different from compile unit where m_value refers to. const DWARFUnit *m_unit = nullptr; // Unit for this form dw_form_t m_form = dw_form_t(0); // Form for this value - ValueType m_value; // Contains all data for the form + ValueType m_value; // Contains all data for the form }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 779b52481b856d8..791bd2d88696c26 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -17,6 +17,7 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_plugin::dwarf; DWARFIndex::~DWARFIndex() = default; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 13fe96dae2aa1d6..4fd952ddd8fc148 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -17,13 +17,13 @@ #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" +namespace lldb_plugin::dwarf { class DWARFDeclContext; class DWARFDIE; -namespace lldb_private { class DWARFIndex { public: - DWARFIndex(Module &module) : m_module(module) {} + DWARFIndex(lldb_private::Module &module) : m_module(module) {} virtual ~DWARFIndex(); virtual void Preload() = 0; @@ -32,53 +32,56 @@ class DWARFIndex { /// (e.g., to only retrieve variables from a given context) should be done by /// the consumer. virtual void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) = 0; virtual void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; /// \a cu must be the skeleton unit if possible, not GetNonSkeletonUnit(). virtual void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) = 0; virtual void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) = 0; virtual void - GetCompleteObjCClass(ConstString class_name, bool must_be_implementation, + GetCompleteObjCClass(lldb_private::ConstString class_name, + bool must_be_implementation, llvm::function_ref callback) = 0; - virtual void GetTypes(ConstString name, + virtual void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) = 0; virtual void - GetNamespaces(ConstString name, + GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void - GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, + SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) = 0; virtual void - GetFunctions(const RegularExpression ®ex, + GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; - virtual void Dump(Stream &s) = 0; + virtual void Dump(lldb_private::Stream &s) = 0; - StatsDuration::Duration GetIndexTime() { return m_index_time; } + lldb_private::StatsDuration::Duration GetIndexTime() { return m_index_time; } protected: - Module &m_module; - StatsDuration m_index_time; + lldb_private::Module &m_module; + lldb_private::StatsDuration m_index_time; /// Helper function implementing common logic for processing function dies. If /// the function given by "ref" matches search criteria given by /// "parent_decl_ctx" and "name_type_mask", it is inserted into the "dies" /// vector. - bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DIERef ref, - SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, - llvm::function_ref callback); + bool + ProcessFunctionDIE(const lldb_private::Module::LookupInfo &lookup_info, + DIERef ref, SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); class DIERefCallbackImpl { public: @@ -102,6 +105,6 @@ class DWARFIndex { void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp index 87af7177ca95ee9..cdacd8425bb8ed4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void DWARFTypeUnit::Dump(Stream *s) const { s->Format("{0:x16}: Type Unit: length = {1:x8}, version = {2:x4}, " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5d939582a312e98..5a85a9dec002fa1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_plugin::dwarf { class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} @@ -37,5 +38,6 @@ class DWARFTypeUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFTYPEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index b51cf04c7b724e5..cd57f6ef5eb7051 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -28,6 +28,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; extern int g_verbose; @@ -201,8 +202,8 @@ DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs) rhs.m_cu = nullptr; } -DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=( - DWARFUnit::ScopedExtractDIEs &&rhs) { +DWARFUnit::ScopedExtractDIEs & +DWARFUnit::ScopedExtractDIEs::operator=(DWARFUnit::ScopedExtractDIEs &&rhs) { m_cu = rhs.m_cu; rhs.m_cu = nullptr; m_clear_dies = rhs.m_clear_dies; @@ -311,9 +312,9 @@ void DWARFUnit::ExtractDIEsRWLocked() { } if (!m_die_array.empty()) { - // The last die cannot have children (if it did, it wouldn't be the last one). - // This only makes a difference for malformed dwarf that does not have a - // terminating null die. + // The last die cannot have children (if it did, it wouldn't be the last + // one). This only makes a difference for malformed dwarf that does not have + // a terminating null die. m_die_array.back().SetHasChildren(false); if (m_first_die) { @@ -720,7 +721,7 @@ void DWARFUnit::ParseProducerInfo() { llvm::SmallVector matches; if (g_swiftlang_version_regex.Execute(producer, &matches)) { - m_producer_version.tryParse(matches[1]); + m_producer_version.tryParse(matches[1]); m_producer = eProducerSwift; } else if (producer.contains("clang")) { if (g_clang_version_regex.Execute(producer, &matches)) @@ -905,9 +906,10 @@ llvm::Error DWARFUnitHeader::ApplyIndexEntry( return llvm::Error::success(); } -llvm::Expected DWARFUnitHeader::extract( - const DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &context, lldb::offset_t *offset_ptr) { +llvm::Expected +DWARFUnitHeader::extract(const DWARFDataExtractor &data, + DIERef::Section section, DWARFContext &context, + lldb::offset_t *offset_ptr) { DWARFUnitHeader header; header.m_offset = *offset_ptr; header.m_length = data.GetDWARFInitialLength(offset_ptr); @@ -1086,22 +1088,20 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { return ranges; } -llvm::Expected -DWARFUnit::FindRnglistFromIndex(uint32_t index) { +llvm::Expected DWARFUnit::FindRnglistFromIndex(uint32_t index) { llvm::Expected maybe_offset = GetRnglistOffset(index); if (!maybe_offset) return maybe_offset.takeError(); return FindRnglistFromOffset(*maybe_offset); } - bool DWARFUnit::HasAny(llvm::ArrayRef tags) { ExtractUnitDIEIfNeeded(); if (m_dwo) return m_dwo->HasAny(tags); - for (const auto &die: m_die_array) { - for (const auto tag: tags) { + for (const auto &die : m_die_array) { + for (const auto tag : tags) { if (tag == die.Tag()) return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 20871d805e77a87..ec8a6241660f6f1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -19,6 +19,7 @@ #include #include +namespace lldb_plugin::dwarf { class DWARFUnit; class DWARFCompileUnit; class NameToDIE; @@ -80,8 +81,7 @@ class DWARFUnitHeader { static llvm::Expected extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &dwarf_context, - lldb::offset_t *offset_ptr); + DWARFContext &dwarf_context, lldb::offset_t *offset_ptr); }; class DWARFUnit : public lldb_private::UserID { @@ -104,6 +104,7 @@ class DWARFUnit : public lldb_private::UserID { class ScopedExtractDIEs { DWARFUnit *m_cu; + public: bool m_clear_dies = false; ScopedExtractDIEs(DWARFUnit &cu); @@ -227,7 +228,9 @@ class DWARFUnit : public lldb_private::UserID { uint8_t GetUnitType() const { return m_header.GetUnitType(); } bool IsTypeUnit() const { return m_header.IsTypeUnit(); } /// Note that this check only works for DWARF5+. - bool IsSkeletonUnit() const { return GetUnitType() == llvm::dwarf::DW_UT_skeleton; } + bool IsSkeletonUnit() const { + return GetUnitType() == llvm::dwarf::DW_UT_skeleton; + } std::optional GetStringOffsetSectionItem(uint32_t index) const; @@ -272,7 +275,6 @@ class DWARFUnit : public lldb_private::UserID { /// True if any DIEs match any tag in \a tags, false otherwise. bool HasAny(llvm::ArrayRef tags); - /// Get the fission .dwo file specific error for this compile unit. /// /// The skeleton compile unit only can have a DWO error. Any other type @@ -351,9 +353,9 @@ class DWARFUnit : public lldb_private::UserID { lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; std::optional m_comp_dir; std::optional m_file_spec; - std::optional m_addr_base; ///< Value of DW_AT_addr_base. - dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. - dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. + std::optional m_addr_base; ///< Value of DW_AT_addr_base. + dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. + dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. std::optional m_gnu_addr_base; std::optional m_gnu_ranges_base; @@ -390,5 +392,6 @@ class DWARFUnit : public lldb_private::UserID { DWARFUnit(const DWARFUnit &) = delete; const DWARFUnit &operator=(const DWARFUnit &) = delete; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index af2d6c554140bc0..748267e3c8bd010 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; llvm::Expected> DebugNamesDWARFIndex::Create(Module &module, DWARFDataExtractor debug_names, @@ -227,7 +228,7 @@ void DebugNamesDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(name.GetStringRef())) { - dwarf::Tag entry_tag = entry.tag(); + lldb_private::dwarf::Tag entry_tag = entry.tag(); if (entry_tag == DW_TAG_namespace || entry_tag == DW_TAG_imported_declaration) { if (!ProcessEntry(entry, callback)) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index abbd700f1603feb..938b3e50ec0fba5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -17,50 +17,51 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include -namespace lldb_private { +namespace lldb_plugin::dwarf { class DebugNamesDWARFIndex : public DWARFIndex { public: static llvm::Expected> - Create(Module &module, DWARFDataExtractor debug_names, - DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor debug_names, + lldb_private::DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); void Preload() override { m_fallback.Preload(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override {} void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: - DebugNamesDWARFIndex(Module &module, + DebugNamesDWARFIndex(lldb_private::Module &module, std::unique_ptr debug_names_up, - DWARFDataExtractor debug_names_data, - DWARFDataExtractor debug_str_data, + lldb_private::DWARFDataExtractor debug_names_data, + lldb_private::DWARFDataExtractor debug_str_data, SymbolFileDWARF &dwarf) : DWARFIndex(module), m_debug_info(dwarf.DebugInfo()), m_debug_names_data(debug_names_data), m_debug_str_data(debug_str_data), @@ -71,8 +72,8 @@ class DebugNamesDWARFIndex : public DWARFIndex { // LLVM DWARFDebugNames will hold a non-owning reference to this data, so keep // track of the ownership here. - DWARFDataExtractor m_debug_names_data; - DWARFDataExtractor m_debug_str_data; + lldb_private::DWARFDataExtractor m_debug_names_data; + lldb_private::DWARFDataExtractor m_debug_str_data; using DebugNames = llvm::DWARFDebugNames; std::unique_ptr m_debug_names_up; @@ -89,6 +90,6 @@ class DebugNamesDWARFIndex : public DWARFIndex { static llvm::DenseSet GetUnits(const DebugNames &debug_names); }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DEBUGNAMESDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 90f18c96afa230e..d4a4cbcc0f5326f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,6 +28,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; void ManualDWARFIndex::Index() { if (m_indexed) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d95cf501face8e4..cd5527459b86d22 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -13,13 +13,13 @@ #include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/ADT/DenseSet.h" +namespace lldb_plugin::dwarf { class DWARFDebugInfo; class SymbolFileDWARFDwo; -namespace lldb_private { class ManualDWARFIndex : public DWARFIndex { public: - ManualDWARFIndex(Module &module, SymbolFileDWARF &dwarf, + ManualDWARFIndex(lldb_private::Module &module, SymbolFileDWARF &dwarf, llvm::DenseSet units_to_avoid = {}) : DWARFIndex(module), m_dwarf(&dwarf), m_units_to_avoid(std::move(units_to_avoid)) {} @@ -27,33 +27,33 @@ class ManualDWARFIndex : public DWARFIndex { void Preload() override { Index(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &unit, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; // Make IndexSet public so we can unit test the encoding and decoding logic. struct IndexSet { @@ -65,8 +65,9 @@ class ManualDWARFIndex : public DWARFIndex { NameToDIE globals; NameToDIE types; NameToDIE namespaces; - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); - void Encode(DataEncoder &encoder) const; + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr); + void Encode(lldb_private::DataEncoder &encoder) const; bool operator==(const IndexSet &rhs) const { return function_basenames == rhs.function_basenames && function_fullnames == rhs.function_fullnames && @@ -94,8 +95,8 @@ class ManualDWARFIndex : public DWARFIndex { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, - bool &signature_mismatch); + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr, bool &signature_mismatch); /// Encode this object into a data encoder object. /// @@ -112,7 +113,7 @@ class ManualDWARFIndex : public DWARFIndex { /// \return /// True if the symbol table's object file can generate a valid signature /// and all data for the symbol table was encoded, false otherwise. - bool Encode(DataEncoder &encoder) const; + bool Encode(lldb_private::DataEncoder &encoder) const; /// Get the cache key string for this symbol table. /// @@ -173,6 +174,6 @@ class ManualDWARFIndex : public DWARFIndex { IndexSet m_set; bool m_indexed = false; }; -} // namespace lldb_private +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 89e628f5eaf1c47..555c9ca2275a007 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -20,6 +20,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; void NameToDIE::Finalize() { m_map.Sort(std::less()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 61df1a628ab5913..4617abc4fca30ef 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -16,6 +16,7 @@ #include "lldb/Core/dwarf.h" #include "lldb/lldb-defines.h" +namespace lldb_plugin::dwarf { class DWARFUnit; class NameToDIE { @@ -45,8 +46,7 @@ class NameToDIE { void ForEach(std::function const - &callback) const; + const DIERef &die_ref)> const &callback) const; /// Decode a serialized version of this object from data. /// @@ -89,5 +89,6 @@ class NameToDIE { protected: lldb_private::UniqueCStringMap m_map; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e472074545a6f07..c6574881e0576c2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -97,6 +97,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; LLDB_PLUGIN_DEFINE(SymbolFileDWARF) @@ -136,9 +137,8 @@ static PluginProperties &GetGlobalPluginProperties() { } static const llvm::DWARFDebugLine::LineTable * -ParseLLVMLineTable(lldb_private::DWARFContext &context, - llvm::DWARFDebugLine &line, dw_offset_t line_offset, - dw_offset_t unit_offset) { +ParseLLVMLineTable(DWARFContext &context, llvm::DWARFDebugLine &line, + dw_offset_t line_offset, dw_offset_t unit_offset) { Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -159,7 +159,7 @@ ParseLLVMLineTable(lldb_private::DWARFContext &context, return *line_table; } -static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, +static bool ParseLLVMLineTablePrologue(DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { @@ -2428,7 +2428,7 @@ bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, // ...But if we are only checking root decl contexts, confirm that the // 'die' is a top-level context. if (only_root_namespaces) - return die.GetParent().Tag() == dwarf::DW_TAG_compile_unit; + return die.GetParent().Tag() == llvm::dwarf::DW_TAG_compile_unit; return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 5aaf8bd270ef7b1..afe592caef60aed 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -38,6 +38,13 @@ #include "DWARFIndex.h" #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace llvm { +class DWARFDebugAbbrev; +} // namespace llvm + +namespace lldb_plugin::dwarf { // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; @@ -54,10 +61,6 @@ class SymbolFileDWARFDwo; class SymbolFileDWARFDwp; class UserID; -namespace llvm { -class DWARFDebugAbbrev; -} - #define DIE_IS_BEING_PARSED ((lldb_private::Type *)1) class SymbolFileDWARF : public lldb_private::SymbolFileCommon { @@ -78,7 +81,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; // Static Functions static void Initialize(); @@ -285,7 +288,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; - lldb_private::DWARFContext &GetDWARFContext() { return m_context; } + DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); @@ -534,7 +537,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_dwp_symfile_once_flag; std::shared_ptr m_dwp_symfile; - lldb_private::DWARFContext m_context; + DWARFContext m_context; llvm::once_flag m_info_once_flag; std::unique_ptr m_info; @@ -547,7 +550,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DebugMacrosMap m_debug_macros_map; ExternalTypeModuleMap m_external_type_modules; - std::unique_ptr m_index; + std::unique_ptr m_index; bool m_fetched_external_modules : 1; lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; @@ -578,5 +581,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// an index that identifies the .DWO or .o file. std::optional m_file_index; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index eadedd32e1a4aaf..afd3a9a5c27b08c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -42,6 +42,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDebugMap::ID; @@ -166,6 +167,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( return file_range_map; } +namespace lldb_plugin::dwarf { class DebugMapModule : public Module { public: DebugMapModule(const ModuleSP &exe_module_sp, uint32_t cu_idx, @@ -222,6 +224,7 @@ class DebugMapModule : public Module { ModuleWP m_exe_module_wp; const uint32_t m_cu_idx; }; +} // namespace lldb_plugin::dwarf void SymbolFileDWARFDebugMap::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 881fd4c45ff05a0..d9ad4f0ac077c29 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -20,6 +20,9 @@ #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_plugin::dwarf { class SymbolFileDWARF; class DWARFCompileUnit; class DWARFDebugAranges; @@ -161,7 +164,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; friend class DebugMapModule; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; friend class DWARFCompileUnit; friend class SymbolFileDWARF; struct OSOInfo { @@ -296,9 +299,10 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { bool Supports_DW_AT_APPLE_objc_complete_type(SymbolFileDWARF *skip_dwarf_oso); - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation); + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation); UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; @@ -403,5 +407,6 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { size_t AddOSOARanges(SymbolFileDWARF *dwarf2Data, DWARFDebugAranges *debug_aranges); }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDEBUGMAP_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 78c3c19684e116d..3d92a9c384fb491 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -21,6 +21,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_plugin::dwarf; char SymbolFileDWARFDwo::ID; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index e98ea49d939baf0..22aba785670e228 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -12,6 +12,7 @@ #include "SymbolFileDWARF.h" #include +namespace lldb_plugin::dwarf { class SymbolFileDWARFDwo : public SymbolFileDWARF { /// LLVM RTTI support. static char ID; @@ -65,9 +66,10 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) override; - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation) override; + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation) override; SymbolFileDWARF &GetBaseSymbolFile() const { return m_base_symbol_file; } @@ -77,5 +79,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { SymbolFileDWARF &m_base_symbol_file; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDWO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 22a921cf61389bc..7378b2502dc9358 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/Declaration.h" using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, const lldb_private::Declaration &decl, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index 0947d1e581c5237..6f8ef5d9049e3e9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -16,6 +16,7 @@ #include "DWARFDIE.h" #include "lldb/Core/Declaration.h" +namespace lldb_plugin::dwarf { class UniqueDWARFASTType { public: // Constructors and Destructors @@ -74,8 +75,7 @@ class UniqueDWARFASTTypeMap { ~UniqueDWARFASTTypeMap() = default; - void Insert(lldb_private::ConstString name, - const UniqueDWARFASTType &entry) { + void Insert(lldb_private::ConstString name, const UniqueDWARFASTType &entry) { m_collection[name.GetCString()].Append(entry); } @@ -95,5 +95,6 @@ class UniqueDWARFASTTypeMap { typedef llvm::DenseMap collection; collection m_collection; }; +} // namespace lldb_plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_UNIQUEDWARFASTTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 69cff0f35ae4ab2..9ca735e5820db57 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -86,6 +86,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_plugin::dwarf; using namespace clang; using llvm::StringSwitch; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 0544de3cd33befb..b18a7b31cf7acc0 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -514,7 +514,7 @@ class TypeSystemClang : public TypeSystem { size_t bit_size); // TypeSystem methods - DWARFASTParser *GetDWARFParser() override; + lldb_plugin::dwarf::DWARFASTParser *GetDWARFParser() override; PDBASTParser *GetPDBParser() override; npdb::PdbAstBuilder *GetNativePDBParser() override; From lldb-commits at lists.llvm.org Tue Oct 3 13:29:10 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Tue, 03 Oct 2023 13:29:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c7996.050a0220.6f9c7.57d6@mx.google.com> ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); ---------------- kastiglione wrote: Why is this necessary? https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 13:29:47 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Tue, 03 Oct 2023 13:29:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c79bb.170a0220.e5950.60bb@mx.google.com> ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); ---------------- kastiglione wrote: that's correct. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 13:50:20 2023 From: lldb-commits at lists.llvm.org (Stanislav Mekhanoshin via lldb-commits) Date: Tue, 03 Oct 2023 13:50:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651c7e8c.620a0220.fc4d7.5664@mx.google.com> rampitec wrote: I have measured compile time performance impact with timing check-llvm-codegen-amdgpu on the release build: ``` before the patch: 11.06s add folding: 11.09s +0.2% remove folding from shrink: 11.02s -0.4% ``` In general the impact smaller than run to run variance, the numbers are median time of 5 runs. https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Tue Oct 3 13:52:24 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 13:52:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c7f08.620a0220.7d844.59d3@mx.google.com> ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); ---------------- junior-jl wrote: I removed that line and the use of `target_sp`, hence, these lines also changed: ```cpp ModuleSP main_module_sp = GetExecutableModule(); BreakpointSP bp_sp = CreateBreakpoint(...) ``` It still works normally and pass the test. Thank you. https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 13:59:19 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Tue, 03 Oct 2023 13:59:19 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c80a7.170a0220.3a882.0182@mx.google.com> ================ @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier ---------------- aartbik wrote: here and elsewhere, let's use compressed(nonordered) in the printing of type, but not in the generation for methods names (replace ( ) with _ perhaps, so we keep valid C identifiiers) https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 14:09:40 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 14:09:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c8314.050a0220.a082a.5883@mx.google.com> https://github.com/junior-jl updated https://github.com/llvm/llvm-project/pull/67019 >From 6de148adcdd1eedea7e23b4e267c6f42bb68bc45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Tue, 3 Oct 2023 15:28:45 -0300 Subject: [PATCH 1/2] [lldb] add stop-at-user-entry option to process launch --- lldb/include/lldb/Target/Language.h | 4 ++ lldb/include/lldb/Target/Target.h | 2 + .../Commands/CommandOptionsProcessLaunch.cpp | 13 +++++- lldb/source/Commands/Options.td | 4 ++ .../Language/CPlusPlus/CPlusPlusLanguage.h | 2 + .../Plugins/Language/ObjC/ObjCLanguage.h | 2 + .../ObjCPlusPlus/ObjCPlusPlusLanguage.h | 2 + lldb/source/Target/Target.cpp | 42 +++++++++++++++++++ .../command-process-launch-user-entry.test | 8 ++++ 9 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 lldb/test/Shell/Commands/command-process-launch-user-entry.test diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index a6b9ccaf31b3c42..d53089ba4a59974 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -160,6 +160,10 @@ class Language : public PluginInterface { virtual lldb::LanguageType GetLanguageType() const = 0; + // Implement this function to return the user-defined entry point name + // for the language + virtual llvm::StringRef GetUserEntryPointName() const { return {}; } + virtual bool IsTopLevelFunction(Function &function); virtual bool IsSourceFile(llvm::StringRef file_path) const = 0; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index e9e531d0e12640a..82a343ee03fb516 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,6 +654,8 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); + lldb::BreakpointSP CreateBreakpointAtUserEntry(); + // Use this to create a file and line breakpoint to a given module or all // module it is nullptr lldb::BreakpointSP CreateBreakpoint(const FileSpecList *containingModules, diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 85ad8ff5e07132c..3055e4ca45bd230 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -8,6 +8,7 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" @@ -15,11 +16,13 @@ #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; using namespace lldb; @@ -38,7 +41,13 @@ Status CommandOptionsProcessLaunch::SetOptionValue( case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; - + case 'm': // Stop at user entry point + { + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); + target_sp->CreateBreakpointAtUserEntry(); + break; + } case 'i': // STDIN for read only { FileAction action; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..dd4cf5c4dc043e7 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -675,6 +675,10 @@ let Command = "platform shell" in { let Command = "process launch" in { def process_launch_stop_at_entry : Option<"stop-at-entry", "s">, Desc<"Stop at the entry point of the program when launching a process.">; + def process_launch_stop_at_user_entry : Option<"stop-at-user-entry", "m">, + Desc<"Stop at the user entry point when launching a process. For C based " + "languages this will be the 'main' function, but this might differ for " + "other languages.">; def process_launch_disable_aslr : Option<"disable-aslr", "A">, Arg<"Boolean">, Desc<"Set whether to disable address space layout randomization when launching a process.">; def process_launch_plugin : Option<"plugin", "P">, Arg<"Plugin">, diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 7712a60b7795951..623d481bf117f48 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -103,6 +103,8 @@ class CPlusPlusLanguage : public Language { return lldb::eLanguageTypeC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + std::unique_ptr GetTypeScavenger() override; lldb::TypeCategoryImplSP GetFormatters() override; diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index bb8057846bb7c30..a50f4b036108d7a 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -127,6 +127,8 @@ class ObjCLanguage : public Language { return lldb::eLanguageTypeObjC; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + // Get all possible names for a method. Examples: // If method_name is "+[NSString(my_additions) myStringWithCString:]" // variant_names[0] => "+[NSString myStringWithCString:]" diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h index b7c71b5dbb1c991..1beab9348eb72e8 100644 --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -27,6 +27,8 @@ class ObjCPlusPlusLanguage : public Language { return lldb::eLanguageTypeObjC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + llvm::StringRef GetNilReferenceSummaryString() override { return "nil"; } bool IsSourceFile(llvm::StringRef file_path) const override; diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f197b1b1aa097c3..013d72bda6308be 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); + return lldb::BreakpointSP(); + } + std::string entryPointName = lang->GetUserEntryPointName().str(); + if (!entryPointName.empty()) + entryPointNamesSet.insert(entryPointName); + } + if (entryPointNamesSet.empty()) { + error.SetErrorString("No entry point name found\n"); + return lldb::BreakpointSP(); + } + BreakpointSP bp_sp = + target_sp->CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware + ); + if (!bp_sp) { + error.SetErrorString("Breakpoint creation failed.\n"); + return lldb::BreakpointSP(); + } + bp_sp->SetOneShot(true); + return bp_sp; +} + BreakpointSP Target::CreateSourceRegexBreakpoint( const FileSpecList *containingModules, const FileSpecList *source_file_spec_list, diff --git a/lldb/test/Shell/Commands/command-process-launch-user-entry.test b/lldb/test/Shell/Commands/command-process-launch-user-entry.test new file mode 100644 index 000000000000000..32ef710fe567439 --- /dev/null +++ b/lldb/test/Shell/Commands/command-process-launch-user-entry.test @@ -0,0 +1,8 @@ +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -s %s -o exit | FileCheck %s + +process launch -m +# CHECK-LABEL: process launch -m +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = one-shot breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c \ No newline at end of file >From 8e0151842db6d261535f26e48a9194e9d90830fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Tue, 3 Oct 2023 18:08:39 -0300 Subject: [PATCH 2/2] refactor CreateBreakpointAtUserEntry method --- lldb/include/lldb/Target/Target.h | 2 +- .../Commands/CommandOptionsProcessLaunch.cpp | 10 +++----- lldb/source/Target/Target.cpp | 25 +++++++++---------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 82a343ee03fb516..8752b42a9518983 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,7 +654,7 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); - lldb::BreakpointSP CreateBreakpointAtUserEntry(); + lldb::BreakpointSP CreateBreakpointAtUserEntry(Status &error); // Use this to create a file and line breakpoint to a given module or all // module it is nullptr diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 3055e4ca45bd230..e3a9cc8aca2356e 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -37,17 +37,15 @@ Status CommandOptionsProcessLaunch::SetOptionValue( Status error; const int short_option = g_process_launch_options[option_idx].short_option; + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); switch (short_option) { case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; case 'm': // Stop at user entry point - { - TargetSP target_sp = - execution_context ? execution_context->GetTargetSP() : TargetSP(); - target_sp->CreateBreakpointAtUserEntry(); + target_sp->CreateBreakpointAtUserEntry(error); break; - } case 'i': // STDIN for read only { FileAction action; @@ -98,8 +96,6 @@ Status CommandOptionsProcessLaunch::SetOptionValue( break; case 'a': { - TargetSP target_sp = - execution_context ? execution_context->GetTargetSP() : TargetSP(); PlatformSP platform_sp = target_sp ? target_sp->GetPlatform() : PlatformSP(); launch_info.GetArchitecture() = diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 013d72bda6308be..eaf9e51a7e56a90 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,10 +335,9 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } -lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { - TargetSP target_sp = shared_from_this(); - Status error; - ModuleSP main_module_sp = target_sp->GetExecutableModule(); +lldb::BreakpointSP +lldb_private::Target::CreateBreakpointAtUserEntry(Status &error) { + ModuleSP main_module_sp = GetExecutableModule(); FileSpecList shared_lib_filter; shared_lib_filter.Append(main_module_sp->GetFileSpec()); llvm::SetVector, @@ -359,15 +358,15 @@ lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { return lldb::BreakpointSP(); } BreakpointSP bp_sp = - target_sp->CreateBreakpoint(&shared_lib_filter, - nullptr, // containingSourceFiles - entryPointNamesSet.takeVector(), - eFunctionNameTypeFull, // func_name_type_mask - eLanguageTypeUnknown, // language - 0, // offset - eLazyBoolNo, // skip_prologue - false, // internal - false // hardware + CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware ); if (!bp_sp) { error.SetErrorString("Breakpoint creation failed.\n"); From lldb-commits at lists.llvm.org Tue Oct 3 14:10:21 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 14:10:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c833d.170a0220.8d13.63a2@mx.google.com> ================ @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); ---------------- junior-jl wrote: Done ✅ https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 14:10:21 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 14:10:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c833d.170a0220.18bed.623e@mx.google.com> https://github.com/junior-jl edited https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 15:36:34 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 15:36:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][OpenMP] Added translation for `omp.teams` to LLVM IR (PR #68042) In-Reply-To: Message-ID: <651c9772.a70a0220.4d723.5c06@mx.google.com> https://github.com/shraiysh closed https://github.com/llvm/llvm-project/pull/68042 From lldb-commits at lists.llvm.org Tue Oct 3 15:37:45 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 15:37:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] 40c1f5b - Fix std::variant test failure on certain buildbot (#68139) Message-ID: <651c97b9.170a0220.32100.6751@mx.google.com> Author: jeffreytan81 Date: 2023-10-03T15:37:41-07:00 New Revision: 40c1f5b2fb88b17c6ed6911466002824d1880871 URL: https://github.com/llvm/llvm-project/commit/40c1f5b2fb88b17c6ed6911466002824d1880871 DIFF: https://github.com/llvm/llvm-project/commit/40c1f5b2fb88b17c6ed6911466002824d1880871.diff LOG: Fix std::variant test failure on certain buildbot (#68139) https://github.com/llvm/llvm-project/pull/68012 works on my CentOS Linux and Macbook but seems to fail for certain build bots. The error log complains "No Value" check failure for `std::variant` but not very actionable without a reproduce. To unblock the build bots, I am commenting out the "No Value" checks. Co-authored-by: jeffreytan81 Added: Modified: lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py Removed: ################################################################################ diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py index 7a433fea5feca23..96a9c8d30c45b00 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -60,6 +60,9 @@ def test_with_run_command(self): "frame variable v3", substrs=["v3 = Active Type = char {", "Value = 'A'", "}"], ) + """ + TODO: temporarily disable No Value tests as they seem to fail on ubuntu/debian + bots. Pending reproduce and investigation. self.expect("frame variable v_no_value", substrs=["v_no_value = No Value"]) @@ -67,3 +70,4 @@ def test_with_run_command(self): "frame variable v_many_types_no_value", substrs=["v_many_types_no_value = No Value"], ) + """ From lldb-commits at lists.llvm.org Tue Oct 3 15:37:47 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Tue, 03 Oct 2023 15:37:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] Fix std::variant test failure on certain buildbot (PR #68139) In-Reply-To: Message-ID: <651c97bb.050a0220.6f9c7.5dd2@mx.google.com> https://github.com/medismailben closed https://github.com/llvm/llvm-project/pull/68139 From lldb-commits at lists.llvm.org Tue Oct 3 15:37:48 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Tue, 03 Oct 2023 15:37:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651c97bc.170a0220.6534e.67bc@mx.google.com> https://github.com/junior-jl edited https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Tue Oct 3 15:41:52 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 15:41:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651c98b0.630a0220.8c785.59a5@mx.google.com> shraiysh wrote: Ping for review! https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Tue Oct 3 15:45:01 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Tue, 03 Oct 2023 15:45:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c996d.620a0220.1dfee.59da@mx.google.com> https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/68130 >From 47b34bb327e1078678d3ba0c96ebce3fc89cf2ae Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 16:43:50 +0000 Subject: [PATCH 1/3] [mlir][sparse] Print new syntax Printing changes from #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> to map = (d0) -> (d0 : compressed). Level properties, ELL and slice are also supported. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 20 +-- .../SparseTensor/IR/SparseTensorDialect.cpp | 64 ++++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 +- .../SparseTensor/roundtrip_encoding.mlir | 32 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 +- .../SparseTensor/sparse_tensor_reshape.mlir | 2 +- .../python/dialects/sparse_tensor/dialect.py | 160 +++++++++--------- 7 files changed, 159 insertions(+), 135 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index bc351ec52c0946b..2920ef79f461c6a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -215,29 +215,29 @@ constexpr const char *toMLIRString(DimLevelType dlt) { case DimLevelType::Compressed: return "compressed"; case DimLevelType::CompressedNu: - return "compressed_nu"; + return "compressed(nonunique)"; case DimLevelType::CompressedNo: - return "compressed_no"; + return "compressed(nonordered)"; case DimLevelType::CompressedNuNo: - return "compressed_nu_no"; + return "compressed(nonunique, nonordered)"; case DimLevelType::Singleton: return "singleton"; case DimLevelType::SingletonNu: - return "singleton_nu"; + return "singleton(nonunique)"; case DimLevelType::SingletonNo: - return "singleton_no"; + return "singleton(nonordered)"; case DimLevelType::SingletonNuNo: - return "singleton_nu_no"; + return "singleton(nonunique, nonordered)"; case DimLevelType::LooseCompressed: return "loose_compressed"; case DimLevelType::LooseCompressedNu: - return "loose_compressed_nu"; + return "loose_compressed(nonunique)"; case DimLevelType::LooseCompressedNo: - return "loose_compressed_no"; + return "loose_compressed(nonordered)"; case DimLevelType::LooseCompressedNuNo: - return "loose_compressed_nu_no"; + return "loose_compressed(nonunique, nonordered)"; case DimLevelType::TwoOutOfFour: - return "compressed24"; + return "block2_4"; } return ""; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 3897e1b9ea3597c..4c8dccdda6c0c7c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; // Print remaining members only for non-default values. - if (!isIdentity()) - printer << ", dimToLvl = affine_map<" << getDimToLvl() << ">"; if (getPosWidth()) printer << ", posWidth = " << getPosWidth(); if (getCrdWidth()) printer << ", crdWidth = " << getCrdWidth(); - if (!getDimSlices().empty()) { - printer << ", dimSlices = [ "; - llvm::interleaveComma(getDimSlices(), printer, - [&](SparseTensorDimSliceAttr attr) { - // Calls SparseTensorDimSliceAttr::print directly to - // skip mnemonic. - attr.print(printer); - }); - printer << " ]"; - } - printer << " }>"; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 69a9c274a861ce1..c3b16807a7c18a6 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index 39e3ef102423524..c4ef50bee01ea2c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // CHECK-LABEL: func private @sparse_1d_tensor( -// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>>) +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) // ----- @@ -13,7 +13,7 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map }> // CHECK-LABEL: func private @sparse_csr( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>) func.func private @sparse_csr(tensor) // ----- @@ -23,7 +23,7 @@ func.func private @sparse_csr(tensor) }> // CHECK-LABEL: func private @CSR_explicit( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : compressed) }>> func.func private @CSR_explicit(%arg0: tensor) { return } @@ -37,7 +37,7 @@ func.func private @CSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @sparse_csc( -// CHECK-SAME: tensor (d1, d0)> }>>) +// CHECK-SAME: tensor (d1 : dense, d0 : compressed) }>>) func.func private @sparse_csc(tensor) // ----- @@ -49,7 +49,7 @@ func.func private @sparse_csc(tensor) }> // CHECK-LABEL: func private @sparse_dcsc( -// CHECK-SAME: tensor (d1, d0)>, crdWidth = 64 }>>) +// CHECK-SAME: tensor (d1 : compressed, d0 : compressed), crdWidth = 64 }>>) func.func private @sparse_dcsc(tensor) // ----- @@ -59,7 +59,7 @@ func.func private @sparse_dcsc(tensor) }> // CHECK-LABEL: func private @sparse_coo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>) func.func private @sparse_coo(tensor) // ----- @@ -69,7 +69,7 @@ func.func private @sparse_coo(tensor) }> // CHECK-LABEL: func private @sparse_bcoo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>) func.func private @sparse_bcoo(tensor) // ----- @@ -79,7 +79,7 @@ func.func private @sparse_bcoo(tensor) }> // CHECK-LABEL: func private @sparse_sorted_coo( -// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed_nu", "singleton" ] }>>) +// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) // ----- @@ -94,7 +94,7 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) }> // CHECK-LABEL: func private @sparse_bcsr( -// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed", "dense", "dense" ], dimToLvl = affine_map<(d0, d1) -> (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) @@ -105,7 +105,7 @@ func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) }> // CHECK-LABEL: func private @sparse_ell( -// CHECK-SAME: tensor (d0 * (s0 * 4), d0, d1)> }>> +// CHECK-SAME: tensor (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>> func.func private @sparse_ell(tensor) // ----- @@ -115,7 +115,7 @@ func.func private @sparse_ell(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -125,7 +125,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -138,7 +138,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_2_out_of_4( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : block2_4) }>> func.func private @sparse_2_out_of_4(tensor) // ----- @@ -153,7 +153,7 @@ func.func private @sparse_2_out_of_4(tensor) }> // CHECK-LABEL: func private @BCSR( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR(%arg0: tensor) { return } @@ -174,7 +174,7 @@ func.func private @BCSR(%arg0: tensor) { }> // CHECK-LABEL: func private @BCSR_explicit( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR_explicit(%arg0: tensor) { return } @@ -190,7 +190,7 @@ func.func private @BCSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @NV_24( -// CHECK-SAME: tensor (d0, d1 floordiv 4, d1 mod 4)> }>> +// CHECK-SAME: tensor (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4) }>> func.func private @NV_24(%arg0: tensor) { return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 7f8edac15302616..3a2376f75654af9 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -16,7 +16,7 @@ // CHECK-ROUND: return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -53,7 +53,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-ROUND: return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-ROUND: return %[[E]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -142,7 +142,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-ROUND: return %[[C]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index 9368cc71c5faa42..e0111c89df65a2d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -4,7 +4,7 @@ #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK: func.func @sparse_reshape( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C25:.*]] = arith.constant 25 : index // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index e1048edce184a51..6d15363fb17118d 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,95 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get( - casted.lvl_types, casted.dim_to_lvl, 8, 32 - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" - ) + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + ) + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From 2be69066192995ff171e08a54f7c7fdd3e35ab44 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 18:39:17 +0000 Subject: [PATCH 2/3] format --- .../python/dialects/sparse_tensor/dialect.py | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 6d15363fb17118d..d80b878323377a4 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,93 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" + ) ) - ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From c7ee65a28b79ffdd45d068638775d5bcf7c20c29 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 22:44:39 +0000 Subject: [PATCH 3/3] update function name --- .../Transforms/SparseTensorCodegen.cpp | 20 +++++++++++++++++-- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 ++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index f02276fba0d526b..a470de8a72bed16 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -472,8 +472,11 @@ class SparseInsertGenerator llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; const Level lvlRank = stt.getLvlRank(); - for (Level l = 0; l < lvlRank; l++) - nameOstream << toMLIRString(stt.getLvlType(l)) << "_"; + for (Level l = 0; l < lvlRank; l++) { + std::string lvlType = toMLIRString(stt.getLvlType(l)); + replaceWithUnderscore(lvlType); + nameOstream << lvlType << "_"; + } // Static dim sizes are used in the generated code while dynamic sizes are // loaded from the dimSizes buffer. This is the reason for adding the shape // to the function name. @@ -489,6 +492,19 @@ class SparseInsertGenerator private: TensorType rtp; + void replaceWithUnderscore(std::string &lvlType) { + for (auto it = lvlType.begin(); it != lvlType.end();) { + if (*it == '(') { + *it = '_'; + } else if (*it == ')' || *it == ' ') { + it = lvlType.erase(it); + continue; + } else if (*it == ',') { + *it = '_'; + } + it++; + } + } }; /// Generations insertion finalization code. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index c3b16807a7c18a6..6ba4769402d15cb 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> From lldb-commits at lists.llvm.org Tue Oct 3 15:54:18 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Tue, 03 Oct 2023 15:54:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c9b9a.170a0220.8d13.682e@mx.google.com> ================ @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier ---------------- yinying-lisa-li wrote: Done! https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 16:02:17 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Tue, 03 Oct 2023 16:02:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651c9d79.620a0220.2eb2.632f@mx.google.com> https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/68130 >From 47b34bb327e1078678d3ba0c96ebce3fc89cf2ae Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 16:43:50 +0000 Subject: [PATCH 1/4] [mlir][sparse] Print new syntax Printing changes from #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> to map = (d0) -> (d0 : compressed). Level properties, ELL and slice are also supported. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 20 +-- .../SparseTensor/IR/SparseTensorDialect.cpp | 64 ++++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 +- .../SparseTensor/roundtrip_encoding.mlir | 32 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 +- .../SparseTensor/sparse_tensor_reshape.mlir | 2 +- .../python/dialects/sparse_tensor/dialect.py | 160 +++++++++--------- 7 files changed, 159 insertions(+), 135 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index bc351ec52c0946b..2920ef79f461c6a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -215,29 +215,29 @@ constexpr const char *toMLIRString(DimLevelType dlt) { case DimLevelType::Compressed: return "compressed"; case DimLevelType::CompressedNu: - return "compressed_nu"; + return "compressed(nonunique)"; case DimLevelType::CompressedNo: - return "compressed_no"; + return "compressed(nonordered)"; case DimLevelType::CompressedNuNo: - return "compressed_nu_no"; + return "compressed(nonunique, nonordered)"; case DimLevelType::Singleton: return "singleton"; case DimLevelType::SingletonNu: - return "singleton_nu"; + return "singleton(nonunique)"; case DimLevelType::SingletonNo: - return "singleton_no"; + return "singleton(nonordered)"; case DimLevelType::SingletonNuNo: - return "singleton_nu_no"; + return "singleton(nonunique, nonordered)"; case DimLevelType::LooseCompressed: return "loose_compressed"; case DimLevelType::LooseCompressedNu: - return "loose_compressed_nu"; + return "loose_compressed(nonunique)"; case DimLevelType::LooseCompressedNo: - return "loose_compressed_no"; + return "loose_compressed(nonordered)"; case DimLevelType::LooseCompressedNuNo: - return "loose_compressed_nu_no"; + return "loose_compressed(nonunique, nonordered)"; case DimLevelType::TwoOutOfFour: - return "compressed24"; + return "block2_4"; } return ""; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 3897e1b9ea3597c..4c8dccdda6c0c7c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; // Print remaining members only for non-default values. - if (!isIdentity()) - printer << ", dimToLvl = affine_map<" << getDimToLvl() << ">"; if (getPosWidth()) printer << ", posWidth = " << getPosWidth(); if (getCrdWidth()) printer << ", crdWidth = " << getCrdWidth(); - if (!getDimSlices().empty()) { - printer << ", dimSlices = [ "; - llvm::interleaveComma(getDimSlices(), printer, - [&](SparseTensorDimSliceAttr attr) { - // Calls SparseTensorDimSliceAttr::print directly to - // skip mnemonic. - attr.print(printer); - }); - printer << " ]"; - } - printer << " }>"; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 69a9c274a861ce1..c3b16807a7c18a6 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index 39e3ef102423524..c4ef50bee01ea2c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // CHECK-LABEL: func private @sparse_1d_tensor( -// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>>) +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) // ----- @@ -13,7 +13,7 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map }> // CHECK-LABEL: func private @sparse_csr( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>) func.func private @sparse_csr(tensor) // ----- @@ -23,7 +23,7 @@ func.func private @sparse_csr(tensor) }> // CHECK-LABEL: func private @CSR_explicit( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : compressed) }>> func.func private @CSR_explicit(%arg0: tensor) { return } @@ -37,7 +37,7 @@ func.func private @CSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @sparse_csc( -// CHECK-SAME: tensor (d1, d0)> }>>) +// CHECK-SAME: tensor (d1 : dense, d0 : compressed) }>>) func.func private @sparse_csc(tensor) // ----- @@ -49,7 +49,7 @@ func.func private @sparse_csc(tensor) }> // CHECK-LABEL: func private @sparse_dcsc( -// CHECK-SAME: tensor (d1, d0)>, crdWidth = 64 }>>) +// CHECK-SAME: tensor (d1 : compressed, d0 : compressed), crdWidth = 64 }>>) func.func private @sparse_dcsc(tensor) // ----- @@ -59,7 +59,7 @@ func.func private @sparse_dcsc(tensor) }> // CHECK-LABEL: func private @sparse_coo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>) func.func private @sparse_coo(tensor) // ----- @@ -69,7 +69,7 @@ func.func private @sparse_coo(tensor) }> // CHECK-LABEL: func private @sparse_bcoo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>) func.func private @sparse_bcoo(tensor) // ----- @@ -79,7 +79,7 @@ func.func private @sparse_bcoo(tensor) }> // CHECK-LABEL: func private @sparse_sorted_coo( -// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed_nu", "singleton" ] }>>) +// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) // ----- @@ -94,7 +94,7 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) }> // CHECK-LABEL: func private @sparse_bcsr( -// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed", "dense", "dense" ], dimToLvl = affine_map<(d0, d1) -> (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) @@ -105,7 +105,7 @@ func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) }> // CHECK-LABEL: func private @sparse_ell( -// CHECK-SAME: tensor (d0 * (s0 * 4), d0, d1)> }>> +// CHECK-SAME: tensor (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>> func.func private @sparse_ell(tensor) // ----- @@ -115,7 +115,7 @@ func.func private @sparse_ell(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -125,7 +125,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -138,7 +138,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_2_out_of_4( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : block2_4) }>> func.func private @sparse_2_out_of_4(tensor) // ----- @@ -153,7 +153,7 @@ func.func private @sparse_2_out_of_4(tensor) }> // CHECK-LABEL: func private @BCSR( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR(%arg0: tensor) { return } @@ -174,7 +174,7 @@ func.func private @BCSR(%arg0: tensor) { }> // CHECK-LABEL: func private @BCSR_explicit( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR_explicit(%arg0: tensor) { return } @@ -190,7 +190,7 @@ func.func private @BCSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @NV_24( -// CHECK-SAME: tensor (d0, d1 floordiv 4, d1 mod 4)> }>> +// CHECK-SAME: tensor (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4) }>> func.func private @NV_24(%arg0: tensor) { return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 7f8edac15302616..3a2376f75654af9 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -16,7 +16,7 @@ // CHECK-ROUND: return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -53,7 +53,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-ROUND: return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-ROUND: return %[[E]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -142,7 +142,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-ROUND: return %[[C]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index 9368cc71c5faa42..e0111c89df65a2d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -4,7 +4,7 @@ #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK: func.func @sparse_reshape( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C25:.*]] = arith.constant 25 : index // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index e1048edce184a51..6d15363fb17118d 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,95 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get( - casted.lvl_types, casted.dim_to_lvl, 8, 32 - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" - ) + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + ) + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From 2be69066192995ff171e08a54f7c7fdd3e35ab44 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 18:39:17 +0000 Subject: [PATCH 2/4] format --- .../python/dialects/sparse_tensor/dialect.py | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 6d15363fb17118d..d80b878323377a4 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,93 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" + ) ) - ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From c7ee65a28b79ffdd45d068638775d5bcf7c20c29 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 22:44:39 +0000 Subject: [PATCH 3/4] update function name --- .../Transforms/SparseTensorCodegen.cpp | 20 +++++++++++++++++-- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 ++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index f02276fba0d526b..a470de8a72bed16 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -472,8 +472,11 @@ class SparseInsertGenerator llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; const Level lvlRank = stt.getLvlRank(); - for (Level l = 0; l < lvlRank; l++) - nameOstream << toMLIRString(stt.getLvlType(l)) << "_"; + for (Level l = 0; l < lvlRank; l++) { + std::string lvlType = toMLIRString(stt.getLvlType(l)); + replaceWithUnderscore(lvlType); + nameOstream << lvlType << "_"; + } // Static dim sizes are used in the generated code while dynamic sizes are // loaded from the dimSizes buffer. This is the reason for adding the shape // to the function name. @@ -489,6 +492,19 @@ class SparseInsertGenerator private: TensorType rtp; + void replaceWithUnderscore(std::string &lvlType) { + for (auto it = lvlType.begin(); it != lvlType.end();) { + if (*it == '(') { + *it = '_'; + } else if (*it == ')' || *it == ' ') { + it = lvlType.erase(it); + continue; + } else if (*it == ',') { + *it = '_'; + } + it++; + } + } }; /// Generations insertion finalization code. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index c3b16807a7c18a6..6ba4769402d15cb 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 2329e0df37e9ae6d36f57de8113028f43f162ddc Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 23:01:57 +0000 Subject: [PATCH 4/4] make replace function more compact --- .../Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index a470de8a72bed16..0d076f6ef9d10ab 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -494,13 +494,11 @@ class SparseInsertGenerator TensorType rtp; void replaceWithUnderscore(std::string &lvlType) { for (auto it = lvlType.begin(); it != lvlType.end();) { - if (*it == '(') { + if (*it == '(' || *it == ',') { *it = '_'; } else if (*it == ')' || *it == ' ') { it = lvlType.erase(it); continue; - } else if (*it == ',') { - *it = '_'; } it++; } From lldb-commits at lists.llvm.org Tue Oct 3 16:11:46 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Tue, 03 Oct 2023 16:11:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] 91223c9 - [lldb] Unbreak debian build after dd76375c8009 Message-ID: <651c9fb2.170a0220.17c66.6b1d@mx.google.com> Author: Alex Langford Date: 2023-10-03T16:11:02-07:00 New Revision: 91223c92f7f7e1a6c074a28de6b5206c22eec116 URL: https://github.com/llvm/llvm-project/commit/91223c92f7f7e1a6c074a28de6b5206c22eec116 DIFF: https://github.com/llvm/llvm-project/commit/91223c92f7f7e1a6c074a28de6b5206c22eec116.diff LOG: [lldb] Unbreak debian build after dd76375c8009 >From the lldb-x86_64-debian buildbot: ``` /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp:984:16: error: call to deleted constructor of 'llvm::Error' return err; ^~~ /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/llvm/include/llvm/Support/Error.h:189:3: note: 'Error' has been explicitly marked deleted here Error(const Error &Other) = delete; ^ /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/llvm/include/llvm/Support/Error.h:496:18: note: passing argument to parameter 'Err' here Expected(Error Err) ^ 1 error generated. ``` Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index b51cf04c7b724e5..a09c68087c47659 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -981,7 +981,7 @@ DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid, entry = index.getFromOffset(expected_header->GetOffset()); if (entry) if (llvm::Error err = expected_header->ApplyIndexEntry(entry)) - return err; + return std::move(err); } const llvm::DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev(); From lldb-commits at lists.llvm.org Tue Oct 3 16:21:55 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Tue, 03 Oct 2023 16:21:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] 40653b6 - [lldb] Fix --persistent-result description (#68128) Message-ID: <651ca213.170a0220.26d1a.6a74@mx.google.com> Author: Dave Lee Date: 2023-10-03T16:21:52-07:00 New Revision: 40653b6d6682777f2874eddfb62876cfe5091a6d URL: https://github.com/llvm/llvm-project/commit/40653b6d6682777f2874eddfb62876cfe5091a6d DIFF: https://github.com/llvm/llvm-project/commit/40653b6d6682777f2874eddfb62876cfe5091a6d.diff LOG: [lldb] Fix --persistent-result description (#68128) The default is not static, it depends on context. For `expression`, the default is true, but for `dwim-print`, the default is false. rdar://116320377 Added: Modified: lldb/source/Commands/Options.td Removed: ################################################################################ diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..cfdeaabfa007743 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -390,7 +390,7 @@ let Command = "expression" in { Arg<"Boolean">, Desc<"Persist expression result in a variable for subsequent use. " "Expression results will be labeled with $-prefixed variables, e.g. $0, " - "$1, etc. Defaults to true.">; + "$1, etc.">; } let Command = "frame diag" in { From lldb-commits at lists.llvm.org Tue Oct 3 16:21:59 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Tue, 03 Oct 2023 16:21:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix --persistent-result description (PR #68128) In-Reply-To: Message-ID: <651ca217.170a0220.f3d68.6cd2@mx.google.com> https://github.com/kastiglione closed https://github.com/llvm/llvm-project/pull/68128 From lldb-commits at lists.llvm.org Tue Oct 3 16:23:45 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Tue, 03 Oct 2023 16:23:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651ca281.170a0220.2ff3d.6fd1@mx.google.com> ================ @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. ---------------- aartbik wrote: I would remove this. This is diverged sufficiently to no longer refer to printAffineMap https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 16:23:49 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Tue, 03 Oct 2023 16:23:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651ca285.170a0220.c51b8.6e2a@mx.google.com> ================ @@ -472,8 +472,11 @@ class SparseInsertGenerator llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; const Level lvlRank = stt.getLvlRank(); - for (Level l = 0; l < lvlRank; l++) - nameOstream << toMLIRString(stt.getLvlType(l)) << "_"; + for (Level l = 0; l < lvlRank; l++) { + std::string lvlType = toMLIRString(stt.getLvlType(l)); + replaceWithUnderscore(lvlType); ---------------- aartbik wrote: std::string::replace ? https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 16:24:44 2023 From: lldb-commits at lists.llvm.org (Peiming Liu via lldb-commits) Date: Tue, 03 Oct 2023 16:24:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651ca2bc.170a0220.89191.06f3@mx.google.com> ================ @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; ---------------- PeimingLiu wrote: I would suggest you break these into smaller functions. https://github.com/llvm/llvm-project/pull/68130 From lldb-commits at lists.llvm.org Tue Oct 3 17:59:13 2023 From: lldb-commits at lists.llvm.org (Konstantin Varlamov via lldb-commits) Date: Tue, 03 Oct 2023 17:59:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] [libc++] Implement ranges::contains (PR #65148) In-Reply-To: Message-ID: <651cb8e1.170a0220.14cdc.6e68@mx.google.com> ================ @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_CONTAINS_H +#define _LIBCPP___ALGORITHM_RANGES_CONTAINS_H + +#include <__algorithm/in_in_result.h> ---------------- var-const wrote: Sorry, what I mean is that a good and easy way to see if we're testing every constraint is to simply remove or comment out a constraint, run the tests and see if any tests fail. If we're testing everything properly, at least one test will fail -- conversely, if everything passes, that means a lack of test coverage. So what I'm suggesting is to temporarily remove the `projected` constraint in your local copy, run the tests and see if there are any failures. If there are no failures, please see if it's possible to add a test that checks we are using the `projected` concept there as required by the Standard. Happy to help if this explanation isn't clear! https://github.com/llvm/llvm-project/pull/65148 From lldb-commits at lists.llvm.org Tue Oct 3 21:25:38 2023 From: lldb-commits at lists.llvm.org (Kazu Hirata via lldb-commits) Date: Tue, 03 Oct 2023 21:25:38 -0700 (PDT) Subject: [Lldb-commits] [lldb] 8641cdf - [lldb] Use std::enable_if_t (NFC) Message-ID: <651ce942.170a0220.894d1.754a@mx.google.com> Author: Kazu Hirata Date: 2023-10-03T21:25:29-07:00 New Revision: 8641cdf397d86f33ac45e4c691ca4f843c359370 URL: https://github.com/llvm/llvm-project/commit/8641cdf397d86f33ac45e4c691ca4f843c359370 DIFF: https://github.com/llvm/llvm-project/commit/8641cdf397d86f33ac45e4c691ca4f843c359370.diff LOG: [lldb] Use std::enable_if_t (NFC) Added: Modified: lldb/include/lldb/Utility/Instrumentation.h Removed: ################################################################################ diff --git a/lldb/include/lldb/Utility/Instrumentation.h b/lldb/include/lldb/Utility/Instrumentation.h index 13ffc0bd39d0b61..4a9ac810eb05e99 100644 --- a/lldb/include/lldb/Utility/Instrumentation.h +++ b/lldb/include/lldb/Utility/Instrumentation.h @@ -21,14 +21,12 @@ namespace lldb_private { namespace instrumentation { -template ::value, int>::type = 0> +template ::value, int> = 0> inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) { ss << t; } -template ::value, - int>::type = 0> +template ::value, int> = 0> inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) { ss << &t; } From lldb-commits at lists.llvm.org Wed Oct 4 01:00:45 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Botond_Istv=C3=A1n_Hprv=C3=A1th?= via lldb-commits) Date: Wed, 04 Oct 2023 01:00:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) In-Reply-To: Message-ID: <651d1bad.170a0220.12e0b.8103@mx.google.com> https://github.com/HoBoIs updated https://github.com/llvm/llvm-project/pull/66487 >From 258462cc65403af147bb47cbeb95210df8e18cd3 Mon Sep 17 00:00:00 2001 From: hobois Date: Fri, 15 Sep 2023 09:28:21 +0200 Subject: [PATCH 1/4] Choose the correct deduction guide If there are two guides, one of them generated from a non-templated constructor and the other from a templated constructor, then the standard gives priority to the first. Clang detected ambiguity before, now the correct guide is chosen. As an unrelated minor change, fix the issue #64020, which could've led to incorrect behavior if further development inserted code after a call to isAddressSpaceSubsetOf() which specified the two parameters in the wrong order. --- clang/lib/Sema/SemaOverload.cpp | 17 ++++++++++++++++- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 2 +- .../over.match.class.deduct/p2.cpp | 10 ++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 45a9e5dc98c032d..1bb81238520173a 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -10153,6 +10153,21 @@ bool clang::isBetterOverloadCandidate( // -- F1 is the copy deduction candidate(16.3.1.8) and F2 is not if (Guide1->getDeductionCandidateKind() == DeductionCandidate::Copy) return true; + if (Guide2->getDeductionCandidateKind() == DeductionCandidate::Copy) + return false; + + // --F1 is generated from a non-template constructor and F2 is generated + // from a constructor template + const auto *Constructor1 = Guide1->getCorrespondingConstructor(); + const auto *Constructor2 = Guide2->getCorrespondingConstructor(); + if (Constructor1 && Constructor2) { + bool isC1Templated = Constructor1->getTemplatedKind() != + FunctionDecl::TemplatedKind::TK_NonTemplate; + bool isC2Templated = Constructor2->getTemplatedKind() != + FunctionDecl::TemplatedKind::TK_NonTemplate; + if (isC1Templated != isC2Templated) + return isC2Templated; + } } } @@ -10196,7 +10211,7 @@ bool clang::isBetterOverloadCandidate( if (AS1 != AS2) { if (Qualifiers::isAddressSpaceSupersetOf(AS2, AS1)) return true; - if (Qualifiers::isAddressSpaceSupersetOf(AS2, AS1)) + if (Qualifiers::isAddressSpaceSupersetOf(AS1, AS2)) return false; } } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 9e5f85b0f9166bd..b9c4a9db842b9ee 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2129,7 +2129,7 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( Function = CXXDeductionGuideDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), InstantiatedExplicitSpecifier, NameInfo, T, TInfo, - D->getSourceRange().getEnd(), /*Ctor=*/nullptr, + D->getSourceRange().getEnd(), DGuide->getCorrespondingConstructor(), DGuide->getDeductionCandidateKind()); Function->setAccess(D->getAccess()); } else { diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp index 4eac0a1ac510f1d..d939d724dc7a0fd 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp @@ -85,3 +85,13 @@ int main() { } + +namespace deduceTemplatedConstructor{ +template struct A { + A(T, T, int); + template + A(int, T, U); +}; + +A x(1, 2, 3); // no-error +} >From 877678b01d05eb301ac49a2a39186a743ca9012d Mon Sep 17 00:00:00 2001 From: hobois Date: Tue, 3 Oct 2023 18:20:11 +0200 Subject: [PATCH 2/4] Added the fix to relasenotes --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6be824771c583be..84eb3301deb4b37 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -390,6 +390,11 @@ Bug Fixes to C++ Support we now produce a diagnostic. Fixes: (`#65522 `_) +- Fixed a bug where clang incorrectly considered implicitly generated deduction + guides from a non-templated constructor and a templated constructor as ambiguous, + rather than prefer the non-templated constructor as specified in + [standard.group]p3 + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed an import failure of recursive friend class template. >From fc425a9be52b9278cd66e123019da2aaa3a0ee9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20Istv=C3=A1n=20Hprv=C3=A1th?= <56926027+HoBoIs at users.noreply.github.com> Date: Tue, 3 Oct 2023 18:26:17 +0200 Subject: [PATCH 3/4] Update ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 84eb3301deb4b37..47984a1b385e492 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -393,7 +393,7 @@ Bug Fixes to C++ Support - Fixed a bug where clang incorrectly considered implicitly generated deduction guides from a non-templated constructor and a templated constructor as ambiguous, rather than prefer the non-templated constructor as specified in - [standard.group]p3 + [standard.group]p3. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ >From cb032a77662c070cb89ee959ed2f52a5f91ecd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20Istv=C3=A1n=20Hprv=C3=A1th?= <56926027+HoBoIs at users.noreply.github.com> Date: Tue, 3 Oct 2023 18:43:41 +0200 Subject: [PATCH 4/4] Formated ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 47984a1b385e492..f79c0fcf231187d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -392,7 +392,7 @@ Bug Fixes to C++ Support - Fixed a bug where clang incorrectly considered implicitly generated deduction guides from a non-templated constructor and a templated constructor as ambiguous, - rather than prefer the non-templated constructor as specified in + rather than prefer the non-templated constructor as specified in [standard.group]p3. Bug Fixes to AST Handling From lldb-commits at lists.llvm.org Wed Oct 4 01:00:54 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 01:00:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <651d1bb6.170a0220.c8970.8080@mx.google.com> https://github.com/DavidSpickett updated https://github.com/llvm/llvm-project/pull/68094 >From 3816b0fbc31825d3878b031a49fb78dd7c256278 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Oct 2023 11:44:17 +0100 Subject: [PATCH 1/3] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing FEAT_SME_FA64 (smefa64 in Linux cpuinfo) allows the use of the full A64 instruction set while in streaming SVE mode. See https://developer.arm.com/documentation/ddi0616/latest/ for details. This means for example if we want to write to the ffr register during or use floating point registers while in streaming mode, we need this extension. I initially was using QEMU which has it by default, and switched to Arm's FVP which does not. So this change adds a more strict check and converts most of the tests to use that. It would be possible in some cases to avoid the offending instructions but it would be a lot of effort and liable to fail randomly as the C library changes. It is also my assumption that the majority of systems will have smefa64 as QEMU has chosen to have. If I turn out to be wrong, we can make the effort to get the tests working without smefa64. --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 6 ++++++ .../aarch64_dynamic_regset/TestArm64DynamicRegsets.py | 4 ++-- .../rw_access_dynamic_resize/TestSVEThreadedDynamic.py | 10 ++++++---- .../rw_access_static_config/TestSVERegisters.py | 5 +++-- .../aarch64_sve_simd_registers/TestSVESIMDRegisters.py | 5 +++-- .../za_dynamic_resize/TestZAThreadedDynamic.py | 6 ++++-- .../aarch64_za_register/za_dynamic_resize/main.c | 1 + .../za_save_restore/TestZARegisterSaveRestore.py | 4 ++-- 8 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index c8670b208ec3f0c..2f4130d3ce68ae0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1271,6 +1271,12 @@ def isAArch64SVE(self): def isAArch64SME(self): return self.isAArch64() and "sme" in self.getCPUInfo() + def isAArch64SMEFA64(self): + # smefa64 allows the use of the full A64 instruction set in streaming + # mode. This is required by certain test programs to setup register + # state. + return self.isAArch64SME() and "smefa64" in self.getCPUInfo() + def isAArch64MTE(self): return self.isAArch64() and "mte" in self.getCPUInfo() diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index 2fb8b33126417c2..0ad69c268a9fd29 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -142,8 +142,8 @@ def make_za_value(self, vl, generator): def test_aarch64_dynamic_regset_config_sme(self): """Test AArch64 Dynamic Register sets configuration, but only SME registers.""" - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present") register_sets = self.setup_register_config_test("sme") diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index 8bcb76776459d01..b19039f0b5212b4 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -108,8 +108,9 @@ def run_sve_test(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be supported and the " + "smefa64 extension must be present.") self.build_for_mode(mode) @@ -201,8 +202,9 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. - if not self.isAArch64SME(): - self.skipTest("Streaming SVE registers must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("Streaming SVE registers must be present and the " + "smefa64 extension must be present.") self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 82b79b8d4b6cc2b..ac99652442b5ddd 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -85,8 +85,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index 814ca98369fca57..def93c78abc2745 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -41,8 +41,9 @@ def skip_if_needed(self, mode): if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") - if (mode == Mode.SSVE) and not self.isAArch64SME(): - self.skipTest("SSVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): + self.skipTest("SSVE registers must be supported and the smefa64 " + "extension must be present.") def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py index 65d1071c26b2a34..884340b395a448d 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py @@ -65,8 +65,10 @@ def check_disabled_za_register(self, svg): self.expect("register read za", substrs=[self.gen_za_value(svg, lambda r: 0)]) def za_test_impl(self, enable_za): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + # Although the test program doesn't obviously do any operations that + # would need smefa64, calls to libc functions like memset may do. + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the sm3fa64 extension must be present") self.build() supported_vg = self.get_supported_vg() diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c index fd2590dbe411f7f..05839c26336cc8e 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c @@ -29,6 +29,7 @@ void set_za_register(int svl, int value_offset) { // you have. So setting one that didn't exist would actually set one that did. // That's why we need the streaming vector length here. for (int i = 0; i < svl; ++i) { + // This may involve instructions that require the smefa64 extension. memset(data, i + value_offset, MAX_VL_BYTES); // Each one of these loads a VL sized row of ZA. asm volatile("mov w12, %w0\n\t" diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py index 910966a0b3b0bc5..a647c91f71119ec 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py @@ -106,8 +106,8 @@ def check_za_disabled(self, vl): self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)]) def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl): - if not self.isAArch64SME(): - self.skipTest("SME must be present.") + if not self.isAArch64SMEFA64(): + self.skipTest("SME and the smefa64 extension must be present.") supported_svg = self.get_supported_svg() if len(supported_svg) < 2: >From 4fb5205f9ea5298c7826be8629acaea31fa32837 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 3 Oct 2023 13:24:39 +0100 Subject: [PATCH 2/3] Fix python formatting. --- .../TestSVEThreadedDynamic.py | 12 ++++++++---- .../rw_access_static_config/TestSVERegisters.py | 6 ++++-- .../TestSVESIMDRegisters.py | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py index b19039f0b5212b4..5d5914bef354655 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -109,8 +109,10 @@ def run_sve_test(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be supported and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be supported and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) @@ -203,8 +205,10 @@ def test_ssve_registers_dynamic_config(self): def setup_svg_test(self, mode): # Even when running in SVE mode, we need access to SVG for these tests. if not self.isAArch64SMEFA64(): - self.skipTest("Streaming SVE registers must be present and the " - "smefa64 extension must be present.") + self.skipTest( + "Streaming SVE registers must be present and the " + "smefa64 extension must be present." + ) self.build_for_mode(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index ac99652442b5ddd..f198d4716e8ee18 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -86,8 +86,10 @@ def skip_if_needed(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def sve_registers_configuration_impl(self, mode): self.skip_if_needed(mode) diff --git a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py index def93c78abc2745..ce4c725714d23cc 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -42,8 +42,10 @@ def skip_if_needed(self, mode): self.skipTest("SVE registers must be supported.") if (mode == Mode.SSVE) and not self.isAArch64SMEFA64(): - self.skipTest("SSVE registers must be supported and the smefa64 " - "extension must be present.") + self.skipTest( + "SSVE registers must be supported and the smefa64 " + "extension must be present." + ) def make_simd_value(self, n): pad = " ".join(["0x00"] * 7) >From 050a344b9467c113a1a79e3c6df8f505af2d572d Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 4 Oct 2023 09:00:25 +0100 Subject: [PATCH 3/3] Only read cpuinfo once. --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 2f4130d3ce68ae0..e3a56f9853130d3 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1275,7 +1275,9 @@ def isAArch64SMEFA64(self): # smefa64 allows the use of the full A64 instruction set in streaming # mode. This is required by certain test programs to setup register # state. - return self.isAArch64SME() and "smefa64" in self.getCPUInfo() + return self.isAArch64SME() and set(["sme", "smefa64"]).issuperset( + set(self.getCPUInfo()) + ) def isAArch64MTE(self): return self.isAArch64() and "mte" in self.getCPUInfo() From lldb-commits at lists.llvm.org Wed Oct 4 01:30:45 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Wed, 04 Oct 2023 01:30:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651d22b5.170a0220.d00a.8398@mx.google.com> antmox wrote: Hi! Could this commit cause the lldb-aarch64-windows bot failure ? https://lab.llvm.org/buildbot/#/builders/219/builds/6086 https://lab.llvm.org/buildbot/#/builders/219/builds/6076 Not sure if it's an unexpected pass or a timeout. Could you please take a look ? https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Wed Oct 4 01:43:10 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 01:43:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651d259e.630a0220.7f2dd.76f1@mx.google.com> DavidSpickett wrote: @antmox You could remove the expected failure marker and see what the result is then. It will be less confusing. It may be that it does pass, but occasionally times out for another reason. Though that is weird given that all the server parts involved are mocks. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Wed Oct 4 01:52:26 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Botond_Istv=C3=A1n_Hprv=C3=A1th?= via lldb-commits) Date: Wed, 04 Oct 2023 01:52:26 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) In-Reply-To: Message-ID: <651d27ca.170a0220.8ff92.7f68@mx.google.com> https://github.com/HoBoIs updated https://github.com/llvm/llvm-project/pull/66487 >From 258462cc65403af147bb47cbeb95210df8e18cd3 Mon Sep 17 00:00:00 2001 From: hobois Date: Fri, 15 Sep 2023 09:28:21 +0200 Subject: [PATCH 1/5] Choose the correct deduction guide If there are two guides, one of them generated from a non-templated constructor and the other from a templated constructor, then the standard gives priority to the first. Clang detected ambiguity before, now the correct guide is chosen. As an unrelated minor change, fix the issue #64020, which could've led to incorrect behavior if further development inserted code after a call to isAddressSpaceSubsetOf() which specified the two parameters in the wrong order. --- clang/lib/Sema/SemaOverload.cpp | 17 ++++++++++++++++- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 2 +- .../over.match.class.deduct/p2.cpp | 10 ++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 45a9e5dc98c032d..1bb81238520173a 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -10153,6 +10153,21 @@ bool clang::isBetterOverloadCandidate( // -- F1 is the copy deduction candidate(16.3.1.8) and F2 is not if (Guide1->getDeductionCandidateKind() == DeductionCandidate::Copy) return true; + if (Guide2->getDeductionCandidateKind() == DeductionCandidate::Copy) + return false; + + // --F1 is generated from a non-template constructor and F2 is generated + // from a constructor template + const auto *Constructor1 = Guide1->getCorrespondingConstructor(); + const auto *Constructor2 = Guide2->getCorrespondingConstructor(); + if (Constructor1 && Constructor2) { + bool isC1Templated = Constructor1->getTemplatedKind() != + FunctionDecl::TemplatedKind::TK_NonTemplate; + bool isC2Templated = Constructor2->getTemplatedKind() != + FunctionDecl::TemplatedKind::TK_NonTemplate; + if (isC1Templated != isC2Templated) + return isC2Templated; + } } } @@ -10196,7 +10211,7 @@ bool clang::isBetterOverloadCandidate( if (AS1 != AS2) { if (Qualifiers::isAddressSpaceSupersetOf(AS2, AS1)) return true; - if (Qualifiers::isAddressSpaceSupersetOf(AS2, AS1)) + if (Qualifiers::isAddressSpaceSupersetOf(AS1, AS2)) return false; } } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 9e5f85b0f9166bd..b9c4a9db842b9ee 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2129,7 +2129,7 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( Function = CXXDeductionGuideDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), InstantiatedExplicitSpecifier, NameInfo, T, TInfo, - D->getSourceRange().getEnd(), /*Ctor=*/nullptr, + D->getSourceRange().getEnd(), DGuide->getCorrespondingConstructor(), DGuide->getDeductionCandidateKind()); Function->setAccess(D->getAccess()); } else { diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp index 4eac0a1ac510f1d..d939d724dc7a0fd 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp @@ -85,3 +85,13 @@ int main() { } + +namespace deduceTemplatedConstructor{ +template struct A { + A(T, T, int); + template + A(int, T, U); +}; + +A x(1, 2, 3); // no-error +} >From 877678b01d05eb301ac49a2a39186a743ca9012d Mon Sep 17 00:00:00 2001 From: hobois Date: Tue, 3 Oct 2023 18:20:11 +0200 Subject: [PATCH 2/5] Added the fix to relasenotes --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6be824771c583be..84eb3301deb4b37 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -390,6 +390,11 @@ Bug Fixes to C++ Support we now produce a diagnostic. Fixes: (`#65522 `_) +- Fixed a bug where clang incorrectly considered implicitly generated deduction + guides from a non-templated constructor and a templated constructor as ambiguous, + rather than prefer the non-templated constructor as specified in + [standard.group]p3 + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed an import failure of recursive friend class template. >From fc425a9be52b9278cd66e123019da2aaa3a0ee9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20Istv=C3=A1n=20Hprv=C3=A1th?= <56926027+HoBoIs at users.noreply.github.com> Date: Tue, 3 Oct 2023 18:26:17 +0200 Subject: [PATCH 3/5] Update ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 84eb3301deb4b37..47984a1b385e492 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -393,7 +393,7 @@ Bug Fixes to C++ Support - Fixed a bug where clang incorrectly considered implicitly generated deduction guides from a non-templated constructor and a templated constructor as ambiguous, rather than prefer the non-templated constructor as specified in - [standard.group]p3 + [standard.group]p3. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ >From cb032a77662c070cb89ee959ed2f52a5f91ecd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20Istv=C3=A1n=20Hprv=C3=A1th?= <56926027+HoBoIs at users.noreply.github.com> Date: Tue, 3 Oct 2023 18:43:41 +0200 Subject: [PATCH 4/5] Formated ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 47984a1b385e492..f79c0fcf231187d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -392,7 +392,7 @@ Bug Fixes to C++ Support - Fixed a bug where clang incorrectly considered implicitly generated deduction guides from a non-templated constructor and a templated constructor as ambiguous, - rather than prefer the non-templated constructor as specified in + rather than prefer the non-templated constructor as specified in [standard.group]p3. Bug Fixes to AST Handling >From 289c269d9c42fc43062fcb3fb950deb0a6cb11e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20Istv=C3=A1n=20Hprv=C3=A1th?= <56926027+HoBoIs at users.noreply.github.com> Date: Wed, 4 Oct 2023 10:52:05 +0200 Subject: [PATCH 5/5] Added tests to p2.cpp --- .../over.match.class.deduct/p2.cpp | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp index d939d724dc7a0fd..57fef7d4cc7da61 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.class.deduct/p2.cpp @@ -87,11 +87,36 @@ int main() { } namespace deduceTemplatedConstructor{ +template struct IsSame { + static constexpr bool value = false; +}; + +template struct IsSame { + static constexpr bool value = true; +}; template struct A { + using value_type = T; + A(value_type); + A(const A&); A(T, T, int); template - A(int, T, U); + A(int, T, U); }; A x(1, 2, 3); // no-error +static_assert(IsSame>::value); + +template +A(T) -> A; + +A a(42); +static_assert(IsSame>::value); +A b = a; +static_assert(IsSame>::value); + +template +A(A) -> A>; + +A b2 = a; +static_assert(IsSame>>::value); } From lldb-commits at lists.llvm.org Wed Oct 4 02:34:35 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Wed, 04 Oct 2023 02:34:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (PR #68193) Message-ID: https://github.com/antmox created https://github.com/llvm/llvm-project/pull/68193 Looks like this test pass since #68050. >From 24ae4ebf70c2b6884b31ca99a72eb0ba16bddf33 Mon Sep 17 00:00:00 2001 From: Antoine Moynault Date: Wed, 4 Oct 2023 09:27:46 +0000 Subject: [PATCH] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows Looks like this test pass since #68050. --- .../API/functionalities/gdb_remote_client/TestPlatformAttach.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py index d62e86b2a3c1d20..2aaf962b9e4f9a5 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -7,7 +7,6 @@ class TestPlatformAttach(GDBRemoteTestBase): @skipIfRemote - @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") def test_attach(self): """Test attaching by name""" From lldb-commits at lists.llvm.org Wed Oct 4 02:35:48 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 02:35:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (PR #68193) In-Reply-To: Message-ID: <651d31f4.a70a0220.b3dcf.74ba@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes Looks like this test pass since #68050. --- Full diff: https://github.com/llvm/llvm-project/pull/68193.diff 1 Files Affected: - (modified) lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py (-1) ``````````diff diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py index d62e86b2a3c1d20..2aaf962b9e4f9a5 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -7,7 +7,6 @@ class TestPlatformAttach(GDBRemoteTestBase): @skipIfRemote - @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") def test_attach(self): """Test attaching by name""" ``````````
https://github.com/llvm/llvm-project/pull/68193 From lldb-commits at lists.llvm.org Wed Oct 4 02:42:10 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 02:42:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (PR #68193) In-Reply-To: Message-ID: <651d3372.170a0220.3a410.8348@mx.google.com> https://github.com/DavidSpickett approved this pull request. Might as well go ahead and try this. If it's still unstable by end of your work day, add a skipifwindows and let Jonas look into it. https://github.com/llvm/llvm-project/pull/68193 From lldb-commits at lists.llvm.org Wed Oct 4 03:12:16 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Wed, 04 Oct 2023 03:12:16 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (PR #68193) In-Reply-To: Message-ID: <651d3a80.170a0220.99b42.84fb@mx.google.com> https://github.com/antmox edited https://github.com/llvm/llvm-project/pull/68193 From lldb-commits at lists.llvm.org Wed Oct 4 03:13:36 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 03:13:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] 75f295c - [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (#68193) Message-ID: <651d3ad0.630a0220.841a0.651c@mx.google.com> Author: antoine moynault Date: 2023-10-04T12:13:31+02:00 New Revision: 75f295c2455131993f9c945320736b84c5fcf99c URL: https://github.com/llvm/llvm-project/commit/75f295c2455131993f9c945320736b84c5fcf99c DIFF: https://github.com/llvm/llvm-project/commit/75f295c2455131993f9c945320736b84c5fcf99c.diff LOG: [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (#68193) Looks like this test passes since #68050. Added: Modified: lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py Removed: ################################################################################ diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py index d62e86b2a3c1d20..2aaf962b9e4f9a5 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -7,7 +7,6 @@ class TestPlatformAttach(GDBRemoteTestBase): @skipIfRemote - @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr52451") def test_attach(self): """Test attaching by name""" From lldb-commits at lists.llvm.org Wed Oct 4 03:13:37 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Wed, 04 Oct 2023 03:13:37 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][test] Remove expected failure marker for TestPlatformAttach on windows (PR #68193) In-Reply-To: Message-ID: <651d3ad1.a70a0220.9dfab.7387@mx.google.com> https://github.com/antmox closed https://github.com/llvm/llvm-project/pull/68193 From lldb-commits at lists.llvm.org Wed Oct 4 03:47:23 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 03:47:23 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651d42bb.170a0220.f804b.24aa@mx.google.com> DavidSpickett wrote: FYI on our Windows on Arm (aka AArch64) bot we got this warning: ``` [4840/6117] Building CXX object tools\lldb\source\Plugins\DynamicLoader\FreeBSD-Kernel\CMakeFiles\lldbPluginDynamicLoaderFreeBSDKernel.dir\DynamicLoaderFreeBSDKernel.cpp.obj C:\Users\tcwg\llvm-worker\lldb-aarch64-windows\llvm-project\lldb\source\Plugins\DynamicLoader\FreeBSD-Kernel\DynamicLoaderFreeBSDKernel.cpp(540,7): warning: format specifies type 'unsigned long' but the argument has type 'std::vector::size_type' (aka 'unsigned long long') [-Wformat] linker_files_list.size()); ^~~~~~~~~~~~~~~~~~~~~~~~ C:\Users\tcwg\llvm-worker\lldb-aarch64-windows\llvm-project\lldb\include\lldb/Utility/Log.h(353,48): note: expanded from macro 'LLDB_LOGF' log_private->Formatf(__FILE__, __func__, __VA_ARGS__); \ ^~~~~~~~~~~ 1 warning generated. ``` And this one on the 32 bit Arm Linux bot: ``` ../llvm-project/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp:540:7: warning: format specifies type 'unsigned long' but the argument has type 'size_type' (aka 'unsigned int') [-Wformat] 1 warning generated. ``` AArch64 Linux was fine. So I assume each one is using one of `unsigned int`, `unsigned long` or `unsigned long long` for its size types. There is probably a way to print that in a portable way but I don't recall it at the moment. https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Wed Oct 4 04:20:50 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 04:20:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651d4a92.170a0220.3896d.88d3@mx.google.com> aokblast wrote: I think the answer is %z from cpp reference as the following screenshot shows. I will check the whole code to prevent issues like this. ![CleanShot 2023-10-04 at 19 17 55](https://github.com/llvm/llvm-project/assets/16476727/8ffdc723-18c5-410a-84d9-40bf0620dd6f) https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Wed Oct 4 04:34:41 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Martin_Storsj=C3=B6?= via lldb-commits) Date: Wed, 04 Oct 2023 04:34:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] d918b81 - [lldb] [debugserver] Add spaces between sentences in a CMake warning. NFC. Message-ID: <651d4dd1.630a0220.7af46.7d42@mx.google.com> Author: Martin Storsjö Date: 2023-10-04T14:34:12+03:00 New Revision: d918b813c852fb4632875c683f4b9552eddea30d URL: https://github.com/llvm/llvm-project/commit/d918b813c852fb4632875c683f4b9552eddea30d DIFF: https://github.com/llvm/llvm-project/commit/d918b813c852fb4632875c683f4b9552eddea30d.diff LOG: [lldb] [debugserver] Add spaces between sentences in a CMake warning. NFC. Added: Modified: lldb/tools/debugserver/source/CMakeLists.txt Removed: ################################################################################ diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index 43accc363ef3cb9..f0b9756becab6e6 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -19,8 +19,8 @@ endfunction() function(get_debugserver_codesign_identity result) string(CONCAT not_found_help - "This will cause failures in the test suite." - "Pass '-DLLDB_USE_SYSTEM_DEBUGSERVER=ON' to use the system one instead." + "This will cause failures in the test suite. " + "Pass '-DLLDB_USE_SYSTEM_DEBUGSERVER=ON' to use the system one instead. " "See 'Code Signing on macOS' in the documentation." ) From lldb-commits at lists.llvm.org Wed Oct 4 04:52:00 2023 From: lldb-commits at lists.llvm.org (Ed Maste via lldb-commits) Date: Wed, 04 Oct 2023 04:52:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651d51e0.a70a0220.17158.7b0e@mx.google.com> emaste wrote: I'm not sure why @aokblast's reply didn't appear here, but indeed `%zu` is the right format specifier. https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Wed Oct 4 04:56:50 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 04:56:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (PR #68210) Message-ID: https://github.com/aokblast created https://github.com/llvm/llvm-project/pull/68210 None >From 9df1a71c17219216bc34e9e750a26c5a874abc17 Mon Sep 17 00:00:00 2001 From: SHENG-YI HONG Date: Wed, 4 Oct 2023 19:55:50 +0800 Subject: [PATCH] fix i386 size_t error when printing value --- .../DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp index bbb83ff0a118400..a1bf8efb064b614 100644 --- a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp @@ -536,7 +536,7 @@ bool DynamicLoaderFreeBSDKernel::ParseKmods(Address linker_files_head_addr) { return false; LLDB_LOGF( log, - "Kmod-changed breakpoint hit, there are %lu kernel modules currently.\n", + "Kmod-changed breakpoint hit, there are %zu kernel modules currently.\n", linker_files_list.size()); ModuleList &modules = m_process->GetTarget().GetImages(); From lldb-commits at lists.llvm.org Wed Oct 4 04:58:17 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 04:58:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] Add dynamic loader handle class for FreeBSD Kernel (PR #67106) In-Reply-To: Message-ID: <651d5359.050a0220.953f0.7d11@mx.google.com> aokblast wrote: I send another PR in [68210](https://github.com/llvm/llvm-project/pull/68210). I don't know if it is the correct way to fix this problem. https://github.com/llvm/llvm-project/pull/67106 From lldb-commits at lists.llvm.org Wed Oct 4 05:07:06 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 05:07:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (PR #68210) In-Reply-To: Message-ID: <651d556a.170a0220.ab3a7.896d@mx.google.com> https://github.com/DavidSpickett approved this pull request. `%zu` is used a lot elsewhere for logs and prints, none of which I've seen warn across Linaro's bots. So this LGTM, thanks for the fix. https://github.com/llvm/llvm-project/pull/68210 From lldb-commits at lists.llvm.org Wed Oct 4 05:12:32 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 05:12:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] e00f227 - [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (#68210) Message-ID: <651d56b0.170a0220.cab38.919f@mx.google.com> Author: aokblast Date: 2023-10-04T08:12:27-04:00 New Revision: e00f2272d640ad5e8eda8982cd616d3ae56036b7 URL: https://github.com/llvm/llvm-project/commit/e00f2272d640ad5e8eda8982cd616d3ae56036b7 DIFF: https://github.com/llvm/llvm-project/commit/e00f2272d640ad5e8eda8982cd616d3ae56036b7.diff LOG: [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (#68210) Added: Modified: lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp index bbb83ff0a118400..a1bf8efb064b614 100644 --- a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp @@ -536,7 +536,7 @@ bool DynamicLoaderFreeBSDKernel::ParseKmods(Address linker_files_head_addr) { return false; LLDB_LOGF( log, - "Kmod-changed breakpoint hit, there are %lu kernel modules currently.\n", + "Kmod-changed breakpoint hit, there are %zu kernel modules currently.\n", linker_files_list.size()); ModuleList &modules = m_process->GetTarget().GetImages(); From lldb-commits at lists.llvm.org Wed Oct 4 05:12:33 2023 From: lldb-commits at lists.llvm.org (Ed Maste via lldb-commits) Date: Wed, 04 Oct 2023 05:12:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (PR #68210) In-Reply-To: Message-ID: <651d56b1.170a0220.7b48a.8b96@mx.google.com> https://github.com/emaste closed https://github.com/llvm/llvm-project/pull/68210 From lldb-commits at lists.llvm.org Wed Oct 4 05:47:20 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 05:47:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] ceec9a7 - [lldb][test] Skip platform attach test on Windows Message-ID: <651d5ed8.630a0220.19996.875e@mx.google.com> Author: David Spickett Date: 2023-10-04T12:46:35Z New Revision: ceec9a7874af2eea8b00b5616fad388ccfa2b4f3 URL: https://github.com/llvm/llvm-project/commit/ceec9a7874af2eea8b00b5616fad388ccfa2b4f3 DIFF: https://github.com/llvm/llvm-project/commit/ceec9a7874af2eea8b00b5616fad388ccfa2b4f3.diff LOG: [lldb][test] Skip platform attach test on Windows This can pass but every so often times out: https://lab.llvm.org/buildbot/#/builders/219/builds/6092 Added: Modified: lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py Removed: ################################################################################ diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py index 2aaf962b9e4f9a5..3aeb87874bdfa87 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformAttach.py @@ -7,6 +7,7 @@ class TestPlatformAttach(GDBRemoteTestBase): @skipIfRemote + @skipIfWindows def test_attach(self): """Test attaching by name""" From lldb-commits at lists.llvm.org Wed Oct 4 05:48:14 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 05:48:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651d5f0e.170a0220.17d57.34dd@mx.google.com> DavidSpickett wrote: I've skipped it entirely in https://github.com/llvm/llvm-project/commit/ceec9a7874af2eea8b00b5616fad388ccfa2b4f3. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Wed Oct 4 05:55:35 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Wed, 04 Oct 2023 05:55:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][FreeBSD] fix i386 size_t error when using LLDB_LOGF (PR #68210) In-Reply-To: Message-ID: <651d60c7.050a0220.a773e.82d0@mx.google.com> DavidSpickett wrote: No warning present in https://lab.llvm.org/buildbot/#/builders/17/builds/44083 or https://lab.llvm.org/buildbot/#/builders/219/builds/6093. So this worked. https://github.com/llvm/llvm-project/pull/68210 From lldb-commits at lists.llvm.org Wed Oct 4 06:05:41 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Wed, 04 Oct 2023 06:05:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651d6325.170a0220.43a32.9455@mx.google.com> antmox wrote: Yes, thanks @DavidSpickett https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Wed Oct 4 07:16:24 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Botond_Istv=C3=A1n_Hprv=C3=A1th?= via lldb-commits) Date: Wed, 04 Oct 2023 07:16:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) In-Reply-To: Message-ID: <651d73b8.620a0220.16df5.860e@mx.google.com> HoBoIs wrote: @erichkeane @shafik I don't have write access. Could you merge if there is nothing to be done? https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 07:23:36 2023 From: lldb-commits at lists.llvm.org (Samira Bazuzi via lldb-commits) Date: Wed, 04 Oct 2023 07:23:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) Message-ID: https://github.com/bazuzi created https://github.com/llvm/llvm-project/pull/68224 C++20 will automatically generate an operator== with reversed operand order, which is ambiguous with the written operator== when one argument is marked const and the other isn't. These operators currently trigger -Wambiguous-reversed-operator at usage sites lldb/source/Symbol/SymbolFileOnDemand.cpp:68 and lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp:1286. >From f87e05395e499069178660a2b614ae3ac7f887c2 Mon Sep 17 00:00:00 2001 From: Samira Bazuzi Date: Wed, 4 Oct 2023 10:06:14 -0400 Subject: [PATCH] [lldb] Mark operator== const to avoid ambiguity in C++20. C++20 will automatically generate an operator== with reversed operand order, which is ambiguous with the written operator== when one argument is marked const and the other isn't. These operators currently trigger -Wambiguous-reversed-operator at usage sites lldb/source/Symbol/SymbolFileOnDemand.cpp:68 and lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp:1286. --- lldb/include/lldb/Utility/XcodeSDK.h | 2 +- .../DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp | 4 ++-- .../DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h | 2 +- lldb/source/Utility/XcodeSDK.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h index 878b131a1814536..f8528995d549c9c 100644 --- a/lldb/include/lldb/Utility/XcodeSDK.h +++ b/lldb/include/lldb/Utility/XcodeSDK.h @@ -69,7 +69,7 @@ class XcodeSDK { XcodeSDK &operator=(const XcodeSDK &other); XcodeSDK(const XcodeSDK&) = default; - bool operator==(const XcodeSDK &other); + bool operator==(const XcodeSDK &other) const; /// Return parsed SDK type and version number. Info Parse() const; diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index 378b2472278605d..5aeaf3ae24d7c7b 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -606,8 +606,8 @@ void DynamicLoaderDarwinKernel::KextImageInfo::SetProcessStopId( m_load_process_stop_id = stop_id; } -bool DynamicLoaderDarwinKernel::KextImageInfo:: -operator==(const KextImageInfo &rhs) { +bool DynamicLoaderDarwinKernel::KextImageInfo::operator==( + const KextImageInfo &rhs) const { if (m_uuid.IsValid() || rhs.GetUUID().IsValid()) { return m_uuid == rhs.GetUUID(); } diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h index 38a60d154820a96..000c382b2c01117 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h @@ -176,7 +176,7 @@ class DynamicLoaderDarwinKernel : public lldb_private::DynamicLoader { void SetProcessStopId(uint32_t stop_id); - bool operator==(const KextImageInfo &rhs); + bool operator==(const KextImageInfo &rhs) const; uint32_t GetAddressByteSize(); // as determined by Mach-O header diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp index 84f3ccbd01e2d07..154ddbebe8b30d5 100644 --- a/lldb/source/Utility/XcodeSDK.cpp +++ b/lldb/source/Utility/XcodeSDK.cpp @@ -56,7 +56,7 @@ XcodeSDK::XcodeSDK(XcodeSDK::Info info) : m_name(GetName(info.type).str()) { XcodeSDK &XcodeSDK::operator=(const XcodeSDK &other) = default; -bool XcodeSDK::operator==(const XcodeSDK &other) { +bool XcodeSDK::operator==(const XcodeSDK &other) const { return m_name == other.m_name; } From lldb-commits at lists.llvm.org Wed Oct 4 07:24:49 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 07:24:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) In-Reply-To: Message-ID: <651d75b1.170a0220.99b42.9c00@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes C++20 will automatically generate an operator== with reversed operand order, which is ambiguous with the written operator== when one argument is marked const and the other isn't. These operators currently trigger -Wambiguous-reversed-operator at usage sites lldb/source/Symbol/SymbolFileOnDemand.cpp:68 and lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp:1286. --- Full diff: https://github.com/llvm/llvm-project/pull/68224.diff 4 Files Affected: - (modified) lldb/include/lldb/Utility/XcodeSDK.h (+1-1) - (modified) lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp (+2-2) - (modified) lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h (+1-1) - (modified) lldb/source/Utility/XcodeSDK.cpp (+1-1) ``````````diff diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h index 878b131a1814536..f8528995d549c9c 100644 --- a/lldb/include/lldb/Utility/XcodeSDK.h +++ b/lldb/include/lldb/Utility/XcodeSDK.h @@ -69,7 +69,7 @@ class XcodeSDK { XcodeSDK &operator=(const XcodeSDK &other); XcodeSDK(const XcodeSDK&) = default; - bool operator==(const XcodeSDK &other); + bool operator==(const XcodeSDK &other) const; /// Return parsed SDK type and version number. Info Parse() const; diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index 378b2472278605d..5aeaf3ae24d7c7b 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -606,8 +606,8 @@ void DynamicLoaderDarwinKernel::KextImageInfo::SetProcessStopId( m_load_process_stop_id = stop_id; } -bool DynamicLoaderDarwinKernel::KextImageInfo:: -operator==(const KextImageInfo &rhs) { +bool DynamicLoaderDarwinKernel::KextImageInfo::operator==( + const KextImageInfo &rhs) const { if (m_uuid.IsValid() || rhs.GetUUID().IsValid()) { return m_uuid == rhs.GetUUID(); } diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h index 38a60d154820a96..000c382b2c01117 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h @@ -176,7 +176,7 @@ class DynamicLoaderDarwinKernel : public lldb_private::DynamicLoader { void SetProcessStopId(uint32_t stop_id); - bool operator==(const KextImageInfo &rhs); + bool operator==(const KextImageInfo &rhs) const; uint32_t GetAddressByteSize(); // as determined by Mach-O header diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp index 84f3ccbd01e2d07..154ddbebe8b30d5 100644 --- a/lldb/source/Utility/XcodeSDK.cpp +++ b/lldb/source/Utility/XcodeSDK.cpp @@ -56,7 +56,7 @@ XcodeSDK::XcodeSDK(XcodeSDK::Info info) : m_name(GetName(info.type).str()) { XcodeSDK &XcodeSDK::operator=(const XcodeSDK &other) = default; -bool XcodeSDK::operator==(const XcodeSDK &other) { +bool XcodeSDK::operator==(const XcodeSDK &other) const { return m_name == other.m_name; } ``````````
https://github.com/llvm/llvm-project/pull/68224 From lldb-commits at lists.llvm.org Wed Oct 4 07:27:08 2023 From: lldb-commits at lists.llvm.org (Samira Bazuzi via lldb-commits) Date: Wed, 04 Oct 2023 07:27:08 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) In-Reply-To: Message-ID: <651d763c.170a0220.4b4f0.9a3c@mx.google.com> bazuzi wrote: @JDevlieghere @labath Are either of you able to review? https://github.com/llvm/llvm-project/pull/68224 From lldb-commits at lists.llvm.org Wed Oct 4 07:35:34 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Wed, 04 Oct 2023 07:35:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) In-Reply-To: Message-ID: <651d7836.170a0220.c21bf.9c5c@mx.google.com> https://github.com/JDevlieghere approved this pull request. https://github.com/llvm/llvm-project/pull/68224 From lldb-commits at lists.llvm.org Wed Oct 4 08:06:00 2023 From: lldb-commits at lists.llvm.org (Samira Bazuzi via lldb-commits) Date: Wed, 04 Oct 2023 08:06:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) In-Reply-To: Message-ID: <651d7f58.a70a0220.507c0.8a08@mx.google.com> bazuzi wrote: Thanks, Jonas. Can you also merge for me? I don't have write access. https://github.com/llvm/llvm-project/pull/68224 From lldb-commits at lists.llvm.org Wed Oct 4 08:15:33 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 08:15:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) Message-ID: <651d8195.170a0220.4c615.9e36@mx.google.com> Botond =?utf-8?q?Istv=C3=A1n_Hprv=C3=A1th?= Message-ID: In-Reply-To: https://github.com/whisperity requested changes to this pull request. https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 08:31:06 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Botond_Istv=C3=A1n_Hprv=C3=A1th?= via lldb-commits) Date: Wed, 04 Oct 2023 08:31:06 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) In-Reply-To: Message-ID: <651d853a.170a0220.4456b.a36c@mx.google.com> ================ @@ -85,3 +85,38 @@ int main() { } + +namespace deduceTemplatedConstructor{ ---------------- HoBoIs wrote: Done https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 08:34:45 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Wed, 04 Oct 2023 08:34:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651d8615.170a0220.58ae1.3dba@mx.google.com> https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/68130 >From 47b34bb327e1078678d3ba0c96ebce3fc89cf2ae Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 16:43:50 +0000 Subject: [PATCH 1/4] [mlir][sparse] Print new syntax Printing changes from #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> to map = (d0) -> (d0 : compressed). Level properties, ELL and slice are also supported. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 20 +-- .../SparseTensor/IR/SparseTensorDialect.cpp | 64 ++++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 +- .../SparseTensor/roundtrip_encoding.mlir | 32 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 +- .../SparseTensor/sparse_tensor_reshape.mlir | 2 +- .../python/dialects/sparse_tensor/dialect.py | 160 +++++++++--------- 7 files changed, 159 insertions(+), 135 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index bc351ec52c0946b..2920ef79f461c6a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -215,29 +215,29 @@ constexpr const char *toMLIRString(DimLevelType dlt) { case DimLevelType::Compressed: return "compressed"; case DimLevelType::CompressedNu: - return "compressed_nu"; + return "compressed(nonunique)"; case DimLevelType::CompressedNo: - return "compressed_no"; + return "compressed(nonordered)"; case DimLevelType::CompressedNuNo: - return "compressed_nu_no"; + return "compressed(nonunique, nonordered)"; case DimLevelType::Singleton: return "singleton"; case DimLevelType::SingletonNu: - return "singleton_nu"; + return "singleton(nonunique)"; case DimLevelType::SingletonNo: - return "singleton_no"; + return "singleton(nonordered)"; case DimLevelType::SingletonNuNo: - return "singleton_nu_no"; + return "singleton(nonunique, nonordered)"; case DimLevelType::LooseCompressed: return "loose_compressed"; case DimLevelType::LooseCompressedNu: - return "loose_compressed_nu"; + return "loose_compressed(nonunique)"; case DimLevelType::LooseCompressedNo: - return "loose_compressed_no"; + return "loose_compressed(nonordered)"; case DimLevelType::LooseCompressedNuNo: - return "loose_compressed_nu_no"; + return "loose_compressed(nonunique, nonordered)"; case DimLevelType::TwoOutOfFour: - return "compressed24"; + return "block2_4"; } return ""; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 3897e1b9ea3597c..4c8dccdda6c0c7c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; // Print remaining members only for non-default values. - if (!isIdentity()) - printer << ", dimToLvl = affine_map<" << getDimToLvl() << ">"; if (getPosWidth()) printer << ", posWidth = " << getPosWidth(); if (getCrdWidth()) printer << ", crdWidth = " << getCrdWidth(); - if (!getDimSlices().empty()) { - printer << ", dimSlices = [ "; - llvm::interleaveComma(getDimSlices(), printer, - [&](SparseTensorDimSliceAttr attr) { - // Calls SparseTensorDimSliceAttr::print directly to - // skip mnemonic. - attr.print(printer); - }); - printer << " ]"; - } - printer << " }>"; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 69a9c274a861ce1..c3b16807a7c18a6 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index 39e3ef102423524..c4ef50bee01ea2c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // CHECK-LABEL: func private @sparse_1d_tensor( -// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>>) +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) // ----- @@ -13,7 +13,7 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map }> // CHECK-LABEL: func private @sparse_csr( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>) func.func private @sparse_csr(tensor) // ----- @@ -23,7 +23,7 @@ func.func private @sparse_csr(tensor) }> // CHECK-LABEL: func private @CSR_explicit( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : compressed) }>> func.func private @CSR_explicit(%arg0: tensor) { return } @@ -37,7 +37,7 @@ func.func private @CSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @sparse_csc( -// CHECK-SAME: tensor (d1, d0)> }>>) +// CHECK-SAME: tensor (d1 : dense, d0 : compressed) }>>) func.func private @sparse_csc(tensor) // ----- @@ -49,7 +49,7 @@ func.func private @sparse_csc(tensor) }> // CHECK-LABEL: func private @sparse_dcsc( -// CHECK-SAME: tensor (d1, d0)>, crdWidth = 64 }>>) +// CHECK-SAME: tensor (d1 : compressed, d0 : compressed), crdWidth = 64 }>>) func.func private @sparse_dcsc(tensor) // ----- @@ -59,7 +59,7 @@ func.func private @sparse_dcsc(tensor) }> // CHECK-LABEL: func private @sparse_coo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>) func.func private @sparse_coo(tensor) // ----- @@ -69,7 +69,7 @@ func.func private @sparse_coo(tensor) }> // CHECK-LABEL: func private @sparse_bcoo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>) func.func private @sparse_bcoo(tensor) // ----- @@ -79,7 +79,7 @@ func.func private @sparse_bcoo(tensor) }> // CHECK-LABEL: func private @sparse_sorted_coo( -// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed_nu", "singleton" ] }>>) +// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) // ----- @@ -94,7 +94,7 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) }> // CHECK-LABEL: func private @sparse_bcsr( -// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed", "dense", "dense" ], dimToLvl = affine_map<(d0, d1) -> (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) @@ -105,7 +105,7 @@ func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) }> // CHECK-LABEL: func private @sparse_ell( -// CHECK-SAME: tensor (d0 * (s0 * 4), d0, d1)> }>> +// CHECK-SAME: tensor (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>> func.func private @sparse_ell(tensor) // ----- @@ -115,7 +115,7 @@ func.func private @sparse_ell(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -125,7 +125,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -138,7 +138,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_2_out_of_4( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : block2_4) }>> func.func private @sparse_2_out_of_4(tensor) // ----- @@ -153,7 +153,7 @@ func.func private @sparse_2_out_of_4(tensor) }> // CHECK-LABEL: func private @BCSR( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR(%arg0: tensor) { return } @@ -174,7 +174,7 @@ func.func private @BCSR(%arg0: tensor) { }> // CHECK-LABEL: func private @BCSR_explicit( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR_explicit(%arg0: tensor) { return } @@ -190,7 +190,7 @@ func.func private @BCSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @NV_24( -// CHECK-SAME: tensor (d0, d1 floordiv 4, d1 mod 4)> }>> +// CHECK-SAME: tensor (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4) }>> func.func private @NV_24(%arg0: tensor) { return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 7f8edac15302616..3a2376f75654af9 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -16,7 +16,7 @@ // CHECK-ROUND: return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -53,7 +53,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-ROUND: return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-ROUND: return %[[E]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -142,7 +142,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-ROUND: return %[[C]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index 9368cc71c5faa42..e0111c89df65a2d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -4,7 +4,7 @@ #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK: func.func @sparse_reshape( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C25:.*]] = arith.constant 25 : index // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index e1048edce184a51..6d15363fb17118d 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,95 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get( - casted.lvl_types, casted.dim_to_lvl, 8, 32 - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" - ) + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + ) + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From 2be69066192995ff171e08a54f7c7fdd3e35ab44 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 18:39:17 +0000 Subject: [PATCH 2/4] format --- .../python/dialects/sparse_tensor/dialect.py | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 6d15363fb17118d..d80b878323377a4 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,93 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" + ) ) - ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From c7ee65a28b79ffdd45d068638775d5bcf7c20c29 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 22:44:39 +0000 Subject: [PATCH 3/4] update function name --- .../Transforms/SparseTensorCodegen.cpp | 20 +++++++++++++++++-- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 ++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index f02276fba0d526b..a470de8a72bed16 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -472,8 +472,11 @@ class SparseInsertGenerator llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; const Level lvlRank = stt.getLvlRank(); - for (Level l = 0; l < lvlRank; l++) - nameOstream << toMLIRString(stt.getLvlType(l)) << "_"; + for (Level l = 0; l < lvlRank; l++) { + std::string lvlType = toMLIRString(stt.getLvlType(l)); + replaceWithUnderscore(lvlType); + nameOstream << lvlType << "_"; + } // Static dim sizes are used in the generated code while dynamic sizes are // loaded from the dimSizes buffer. This is the reason for adding the shape // to the function name. @@ -489,6 +492,19 @@ class SparseInsertGenerator private: TensorType rtp; + void replaceWithUnderscore(std::string &lvlType) { + for (auto it = lvlType.begin(); it != lvlType.end();) { + if (*it == '(') { + *it = '_'; + } else if (*it == ')' || *it == ' ') { + it = lvlType.erase(it); + continue; + } else if (*it == ',') { + *it = '_'; + } + it++; + } + } }; /// Generations insertion finalization code. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index c3b16807a7c18a6..6ba4769402d15cb 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 2329e0df37e9ae6d36f57de8113028f43f162ddc Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 23:01:57 +0000 Subject: [PATCH 4/4] make replace function more compact --- .../Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index a470de8a72bed16..0d076f6ef9d10ab 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -494,13 +494,11 @@ class SparseInsertGenerator TensorType rtp; void replaceWithUnderscore(std::string &lvlType) { for (auto it = lvlType.begin(); it != lvlType.end();) { - if (*it == '(') { + if (*it == '(' || *it == ',') { *it = '_'; } else if (*it == ')' || *it == ' ') { it = lvlType.erase(it); continue; - } else if (*it == ',') { - *it = '_'; } it++; } From lldb-commits at lists.llvm.org Wed Oct 4 08:36:58 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Wed, 04 Oct 2023 08:36:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] Print new syntax (PR #68130) In-Reply-To: Message-ID: <651d869a.170a0220.b89c6.a514@mx.google.com> https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/68130 >From 47b34bb327e1078678d3ba0c96ebce3fc89cf2ae Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 16:43:50 +0000 Subject: [PATCH 1/5] [mlir][sparse] Print new syntax Printing changes from #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> to map = (d0) -> (d0 : compressed). Level properties, ELL and slice are also supported. --- .../mlir/Dialect/SparseTensor/IR/Enums.h | 20 +-- .../SparseTensor/IR/SparseTensorDialect.cpp | 64 ++++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 +- .../SparseTensor/roundtrip_encoding.mlir | 32 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 +- .../SparseTensor/sparse_tensor_reshape.mlir | 2 +- .../python/dialects/sparse_tensor/dialect.py | 160 +++++++++--------- 7 files changed, 159 insertions(+), 135 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index bc351ec52c0946b..2920ef79f461c6a 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -215,29 +215,29 @@ constexpr const char *toMLIRString(DimLevelType dlt) { case DimLevelType::Compressed: return "compressed"; case DimLevelType::CompressedNu: - return "compressed_nu"; + return "compressed(nonunique)"; case DimLevelType::CompressedNo: - return "compressed_no"; + return "compressed(nonordered)"; case DimLevelType::CompressedNuNo: - return "compressed_nu_no"; + return "compressed(nonunique, nonordered)"; case DimLevelType::Singleton: return "singleton"; case DimLevelType::SingletonNu: - return "singleton_nu"; + return "singleton(nonunique)"; case DimLevelType::SingletonNo: - return "singleton_no"; + return "singleton(nonordered)"; case DimLevelType::SingletonNuNo: - return "singleton_nu_no"; + return "singleton(nonunique, nonordered)"; case DimLevelType::LooseCompressed: return "loose_compressed"; case DimLevelType::LooseCompressedNu: - return "loose_compressed_nu"; + return "loose_compressed(nonunique)"; case DimLevelType::LooseCompressedNo: - return "loose_compressed_no"; + return "loose_compressed(nonordered)"; case DimLevelType::LooseCompressedNuNo: - return "loose_compressed_nu_no"; + return "loose_compressed(nonunique, nonordered)"; case DimLevelType::TwoOutOfFour: - return "compressed24"; + return "block2_4"; } return ""; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 3897e1b9ea3597c..4c8dccdda6c0c7c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -586,30 +586,56 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { } void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { - // Print the struct-like storage in dictionary fashion. - printer << "<{ lvlTypes = [ "; - llvm::interleaveComma(getLvlTypes(), printer, [&](DimLevelType dlt) { - printer << "\"" << toMLIRString(dlt) << "\""; - }); - printer << " ]"; + auto map = static_cast(getDimToLvl()); + auto lvlTypes = getLvlTypes(); + // Empty affine map indicates identity map + if (!map) { + map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); + } + // Modified version of AsmPrinter::Impl::printAffineMap. + printer << "<{ map = "; + // Symbolic identifiers. + if (map.getNumSymbols() != 0) { + printer << '['; + for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) + printer << 's' << i << ", "; + if (map.getNumSymbols() >= 1) + printer << 's' << map.getNumSymbols() - 1; + printer << ']'; + } + // Dimension identifiers. + printer << '('; + auto dimSlices = getDimSlices(); + if (!dimSlices.empty()) { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << " : " << dimSlices[i] << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1 << " : " + << dimSlices[map.getNumDims() - 1]; + } else { + for (unsigned i = 0; i < map.getNumDims() - 1; ++i) + printer << 'd' << i << ", "; + if (map.getNumDims() >= 1) + printer << 'd' << map.getNumDims() - 1; + } + printer << ')'; + // Level format and properties. + printer << " -> ("; + for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { + map.getResult(i).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[i]) << ", "; + } + if (map.getNumResults() >= 1) { + auto lastIndex = map.getNumResults() - 1; + map.getResult(lastIndex).print(printer.getStream()); + printer << " : " << toMLIRString(lvlTypes[lastIndex]); + } + printer << ')'; // Print remaining members only for non-default values. - if (!isIdentity()) - printer << ", dimToLvl = affine_map<" << getDimToLvl() << ">"; if (getPosWidth()) printer << ", posWidth = " << getPosWidth(); if (getCrdWidth()) printer << ", crdWidth = " << getCrdWidth(); - if (!getDimSlices().empty()) { - printer << ", dimSlices = [ "; - llvm::interleaveComma(getDimSlices(), printer, - [&](SparseTensorDimSliceAttr attr) { - // Calls SparseTensorDimSliceAttr::print directly to - // skip mnemonic. - attr.print(printer); - }); - printer << " ]"; - } - printer << " }>"; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 69a9c274a861ce1..c3b16807a7c18a6 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir index 39e3ef102423524..c4ef50bee01ea2c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // CHECK-LABEL: func private @sparse_1d_tensor( -// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>>) +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>) // ----- @@ -13,7 +13,7 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map }> // CHECK-LABEL: func private @sparse_csr( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>) func.func private @sparse_csr(tensor) // ----- @@ -23,7 +23,7 @@ func.func private @sparse_csr(tensor) }> // CHECK-LABEL: func private @CSR_explicit( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : compressed) }>> func.func private @CSR_explicit(%arg0: tensor) { return } @@ -37,7 +37,7 @@ func.func private @CSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @sparse_csc( -// CHECK-SAME: tensor (d1, d0)> }>>) +// CHECK-SAME: tensor (d1 : dense, d0 : compressed) }>>) func.func private @sparse_csc(tensor) // ----- @@ -49,7 +49,7 @@ func.func private @sparse_csc(tensor) }> // CHECK-LABEL: func private @sparse_dcsc( -// CHECK-SAME: tensor (d1, d0)>, crdWidth = 64 }>>) +// CHECK-SAME: tensor (d1 : compressed, d0 : compressed), crdWidth = 64 }>>) func.func private @sparse_dcsc(tensor) // ----- @@ -59,7 +59,7 @@ func.func private @sparse_dcsc(tensor) }> // CHECK-LABEL: func private @sparse_coo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>) func.func private @sparse_coo(tensor) // ----- @@ -69,7 +69,7 @@ func.func private @sparse_coo(tensor) }> // CHECK-LABEL: func private @sparse_bcoo( -// CHECK-SAME: tensor>) +// CHECK-SAME: tensor (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>) func.func private @sparse_bcoo(tensor) // ----- @@ -79,7 +79,7 @@ func.func private @sparse_bcoo(tensor) }> // CHECK-LABEL: func private @sparse_sorted_coo( -// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed_nu", "singleton" ] }>>) +// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) // ----- @@ -94,7 +94,7 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>) }> // CHECK-LABEL: func private @sparse_bcsr( -// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed", "dense", "dense" ], dimToLvl = affine_map<(d0, d1) -> (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) @@ -105,7 +105,7 @@ func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>) }> // CHECK-LABEL: func private @sparse_ell( -// CHECK-SAME: tensor (d0 * (s0 * 4), d0, d1)> }>> +// CHECK-SAME: tensor (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>> func.func private @sparse_ell(tensor) // ----- @@ -115,7 +115,7 @@ func.func private @sparse_ell(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -125,7 +125,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_slice( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor, d1 : #sparse_tensor) -> (d0 : dense, d1 : compressed) }>> func.func private @sparse_slice(tensor) // ----- @@ -138,7 +138,7 @@ func.func private @sparse_slice(tensor) }> // CHECK-LABEL: func private @sparse_2_out_of_4( -// CHECK-SAME: tensor> +// CHECK-SAME: tensor (d0 : dense, d1 : block2_4) }>> func.func private @sparse_2_out_of_4(tensor) // ----- @@ -153,7 +153,7 @@ func.func private @sparse_2_out_of_4(tensor) }> // CHECK-LABEL: func private @BCSR( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR(%arg0: tensor) { return } @@ -174,7 +174,7 @@ func.func private @BCSR(%arg0: tensor) { }> // CHECK-LABEL: func private @BCSR_explicit( -// CHECK-SAME: tensor (d0 floordiv 2, d1 floordiv 3, d0 mod 2, d1 mod 3)> }>> +// CHECK-SAME: tensor (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>> func.func private @BCSR_explicit(%arg0: tensor) { return } @@ -190,7 +190,7 @@ func.func private @BCSR_explicit(%arg0: tensor) { }> // CHECK-LABEL: func private @NV_24( -// CHECK-SAME: tensor (d0, d1 floordiv 4, d1 mod 4)> }>> +// CHECK-SAME: tensor (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4) }>> func.func private @NV_24(%arg0: tensor) { return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 7f8edac15302616..3a2376f75654af9 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -16,7 +16,7 @@ // CHECK-ROUND: return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -53,7 +53,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-ROUND: return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> // // CHECK-LABEL: func.func @sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -99,7 +99,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-ROUND: return %[[E]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_expand( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -142,7 +142,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-ROUND: return %[[C]] : tensor> // // CHECK-LABEL: func.func @dynamic_sparse_collapse( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index 9368cc71c5faa42..e0111c89df65a2d 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -4,7 +4,7 @@ #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK: func.func @sparse_reshape( -// CHECK-SAME: %[[S:.*]]: +// CHECK-SAME: %[[S:[a-zA-Z0-9_]*]]: // CHECK-DAG: %[[C25:.*]] = arith.constant 25 : index // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index e1048edce184a51..6d15363fb17118d 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,95 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get( - casted.lvl_types, casted.dim_to_lvl, 8, 32 - ) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ], dimToLvl = affine_map<(d0, d1) -> (d1, d0)>, posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" - ) + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ], posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + ) + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From 2be69066192995ff171e08a54f7c7fdd3e35ab44 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 18:39:17 +0000 Subject: [PATCH 2/5] format --- .../python/dialects/sparse_tensor/dialect.py | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 6d15363fb17118d..d80b878323377a4 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -13,93 +13,93 @@ def run(f): # CHECK-LABEL: TEST: testEncodingAttr1D @run def testEncodingAttr1D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed)," - " posWidth = 16," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: None - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 16 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> - print(created) - # CHECK: created_equal: False - print(f"created_equal: {created == casted}") - - # Verify that the factory creates an instance of the proper type. - # CHECK: is_proper_instance: True - print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") - # CHECK: created_pos_width: 0 - print(f"created_pos_width: {created.pos_width}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed)," + " posWidth = 16," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 16, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: None + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 16 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, None, 0, 0) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> + print(created) + # CHECK: created_equal: False + print(f"created_equal: {created == casted}") + + # Verify that the factory creates an instance of the proper type. + # CHECK: is_proper_instance: True + print(f"is_proper_instance: {isinstance(created, st.EncodingAttr)}") + # CHECK: created_pos_width: 0 + print(f"created_pos_width: {created.pos_width}") # CHECK-LABEL: TEST: testEncodingAttr2D @run def testEncodingAttr2D(): - with Context() as ctx: - parsed = Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0, d1) -> (d1 : dense, d0 : compressed)," - " posWidth = 8," - " crdWidth = 32" - "}>" - ) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(parsed) - - casted = st.EncodingAttr(parsed) - # CHECK: equal: True - print(f"equal: {casted == parsed}") - - # CHECK: lvl_types: [, ] - print(f"lvl_types: {casted.lvl_types}") - # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) - print(f"dim_to_lvl: {casted.dim_to_lvl}") - # CHECK: pos_width: 8 - print(f"pos_width: {casted.pos_width}") - # CHECK: crd_width: 32 - print(f"crd_width: {casted.crd_width}") - - created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) - # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> - print(created) - # CHECK: created_equal: True - print(f"created_equal: {created == casted}") + with Context() as ctx: + parsed = Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0, d1) -> (d1 : dense, d0 : compressed)," + " posWidth = 8," + " crdWidth = 32" + "}>" + ) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(parsed) + + casted = st.EncodingAttr(parsed) + # CHECK: equal: True + print(f"equal: {casted == parsed}") + + # CHECK: lvl_types: [, ] + print(f"lvl_types: {casted.lvl_types}") + # CHECK: dim_to_lvl: (d0, d1) -> (d1, d0) + print(f"dim_to_lvl: {casted.dim_to_lvl}") + # CHECK: pos_width: 8 + print(f"pos_width: {casted.pos_width}") + # CHECK: crd_width: 32 + print(f"crd_width: {casted.crd_width}") + + created = st.EncodingAttr.get(casted.lvl_types, casted.dim_to_lvl, 8, 32) + # CHECK: #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed), posWidth = 8, crdWidth = 32 }> + print(created) + # CHECK: created_equal: True + print(f"created_equal: {created == casted}") # CHECK-LABEL: TEST: testEncodingAttrOnTensorType @run def testEncodingAttrOnTensorType(): - with Context() as ctx, Location.unknown(): - encoding = st.EncodingAttr( - Attribute.parse( - "#sparse_tensor.encoding<{" - " map = (d0) -> (d0 : compressed), " - " posWidth = 64," - " crdWidth = 32" - "}>" + with Context() as ctx, Location.unknown(): + encoding = st.EncodingAttr( + Attribute.parse( + "#sparse_tensor.encoding<{" + " map = (d0) -> (d0 : compressed), " + " posWidth = 64," + " crdWidth = 32" + "}>" + ) ) - ) - tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) - # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> - print(tt) - # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> - print(tt.encoding) - assert tt.encoding == encoding + tt = RankedTensorType.get((1024,), F32Type.get(), encoding=encoding) + # CHECK: tensor<1024xf32, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }>> + print(tt) + # CHECK: #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), posWidth = 64, crdWidth = 32 }> + print(tt.encoding) + assert tt.encoding == encoding >From c7ee65a28b79ffdd45d068638775d5bcf7c20c29 Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 22:44:39 +0000 Subject: [PATCH 3/5] update function name --- .../Transforms/SparseTensorCodegen.cpp | 20 +++++++++++++++++-- mlir/test/Dialect/SparseTensor/codegen.mlir | 8 ++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index f02276fba0d526b..a470de8a72bed16 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -472,8 +472,11 @@ class SparseInsertGenerator llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; const Level lvlRank = stt.getLvlRank(); - for (Level l = 0; l < lvlRank; l++) - nameOstream << toMLIRString(stt.getLvlType(l)) << "_"; + for (Level l = 0; l < lvlRank; l++) { + std::string lvlType = toMLIRString(stt.getLvlType(l)); + replaceWithUnderscore(lvlType); + nameOstream << lvlType << "_"; + } // Static dim sizes are used in the generated code while dynamic sizes are // loaded from the dimSizes buffer. This is the reason for adding the shape // to the function name. @@ -489,6 +492,19 @@ class SparseInsertGenerator private: TensorType rtp; + void replaceWithUnderscore(std::string &lvlType) { + for (auto it = lvlType.begin(); it != lvlType.end();) { + if (*it == '(') { + *it = '_'; + } else if (*it == ')' || *it == ' ') { + it = lvlType.erase(it); + continue; + } else if (*it == ',') { + *it = '_'; + } + it++; + } + } }; /// Generations insertion finalization code. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index c3b16807a7c18a6..6ba4769402d15cb 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @"_insert_dense_compressed(nonordered)_8_8_f64_0_0"(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @"_insert_compressed(nonunique)_singleton_5_6_f64_0_0"(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 2329e0df37e9ae6d36f57de8113028f43f162ddc Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Tue, 3 Oct 2023 23:01:57 +0000 Subject: [PATCH 4/5] make replace function more compact --- .../Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index a470de8a72bed16..0d076f6ef9d10ab 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -494,13 +494,11 @@ class SparseInsertGenerator TensorType rtp; void replaceWithUnderscore(std::string &lvlType) { for (auto it = lvlType.begin(); it != lvlType.end();) { - if (*it == '(') { + if (*it == '(' || *it == ',') { *it = '_'; } else if (*it == ')' || *it == ' ') { it = lvlType.erase(it); continue; - } else if (*it == ',') { - *it = '_'; } it++; } >From 0d628f56229a8e2225b3222c60aa04c549d3a08c Mon Sep 17 00:00:00 2001 From: Yinying Li Date: Wed, 4 Oct 2023 15:35:30 +0000 Subject: [PATCH 5/5] address reivew comments --- .../SparseTensor/IR/SparseTensorAttrDefs.td | 8 ++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 42 ++++++++++++------- .../Transforms/SparseTensorCodegen.cpp | 19 ++++----- 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td index 4e38f314a27391d..a3fe938a4af3d89 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -422,6 +422,14 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", std::optional getStaticLvlSliceOffset(::mlir::sparse_tensor::Level lvl) const; std::optional getStaticLvlSliceSize(::mlir::sparse_tensor::Level lvl) const; std::optional getStaticLvlSliceStride(::mlir::sparse_tensor::Level lvl) const; + + // + // Printing methods. + // + + void printSymbol(AffineMap &map, AsmPrinter &printer) const; + void printDimension(AffineMap &map, AsmPrinter &printer, ArrayRef<::mlir::sparse_tensor::SparseTensorDimSliceAttr> dimSlices) const; + void printLevel(AffineMap &map, AsmPrinter &printer, ArrayRef<::mlir::sparse_tensor::DimLevelType> lvlTypes) const; }]; let genVerifyDecl = 1; diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 4c8dccdda6c0c7c..fa4c366d03bf43f 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -587,14 +587,27 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { auto map = static_cast(getDimToLvl()); - auto lvlTypes = getLvlTypes(); // Empty affine map indicates identity map if (!map) { map = AffineMap::getMultiDimIdentityMap(getLvlTypes().size(), getContext()); } - // Modified version of AsmPrinter::Impl::printAffineMap. printer << "<{ map = "; - // Symbolic identifiers. + printSymbol(map, printer); + printer << '('; + printDimension(map, printer, getDimSlices()); + printer << ") -> ("; + printLevel(map, printer, getLvlTypes()); + printer << ')'; + // Print remaining members only for non-default values. + if (getPosWidth()) + printer << ", posWidth = " << getPosWidth(); + if (getCrdWidth()) + printer << ", crdWidth = " << getCrdWidth(); + printer << " }>"; +} + +void SparseTensorEncodingAttr::printSymbol(AffineMap &map, + AsmPrinter &printer) const { if (map.getNumSymbols() != 0) { printer << '['; for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i) @@ -603,9 +616,11 @@ void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { printer << 's' << map.getNumSymbols() - 1; printer << ']'; } - // Dimension identifiers. - printer << '('; - auto dimSlices = getDimSlices(); +} + +void SparseTensorEncodingAttr::printDimension( + AffineMap &map, AsmPrinter &printer, + ArrayRef dimSlices) const { if (!dimSlices.empty()) { for (unsigned i = 0; i < map.getNumDims() - 1; ++i) printer << 'd' << i << " : " << dimSlices[i] << ", "; @@ -618,9 +633,11 @@ void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { if (map.getNumDims() >= 1) printer << 'd' << map.getNumDims() - 1; } - printer << ')'; - // Level format and properties. - printer << " -> ("; +} + +void SparseTensorEncodingAttr::printLevel( + AffineMap &map, AsmPrinter &printer, + ArrayRef lvlTypes) const { for (unsigned i = 0; i < map.getNumResults() - 1; ++i) { map.getResult(i).print(printer.getStream()); printer << " : " << toMLIRString(lvlTypes[i]) << ", "; @@ -630,13 +647,6 @@ void SparseTensorEncodingAttr::print(AsmPrinter &printer) const { map.getResult(lastIndex).print(printer.getStream()); printer << " : " << toMLIRString(lvlTypes[lastIndex]); } - printer << ')'; - // Print remaining members only for non-default values. - if (getPosWidth()) - printer << ", posWidth = " << getPosWidth(); - if (getCrdWidth()) - printer << ", crdWidth = " << getCrdWidth(); - printer << " }>"; } LogicalResult diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 0d076f6ef9d10ab..80abc3d602a0cf1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -474,7 +474,13 @@ class SparseInsertGenerator const Level lvlRank = stt.getLvlRank(); for (Level l = 0; l < lvlRank; l++) { std::string lvlType = toMLIRString(stt.getLvlType(l)); - replaceWithUnderscore(lvlType); + // Replace/remove punctuations in level properties. + std::replace_if( + lvlType.begin(), lvlType.end(), + [](char c) { return c == '(' || c == ','; }, '_'); + lvlType.erase(std::remove_if(lvlType.begin(), lvlType.end(), + [](char c) { return c == ')' || c == ' '; }), + lvlType.end()); nameOstream << lvlType << "_"; } // Static dim sizes are used in the generated code while dynamic sizes are @@ -492,17 +498,6 @@ class SparseInsertGenerator private: TensorType rtp; - void replaceWithUnderscore(std::string &lvlType) { - for (auto it = lvlType.begin(); it != lvlType.end();) { - if (*it == '(' || *it == ',') { - *it = '_'; - } else if (*it == ')' || *it == ' ') { - it = lvlType.erase(it); - continue; - } - it++; - } - } }; /// Generations insertion finalization code. From lldb-commits at lists.llvm.org Wed Oct 4 09:01:59 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:01:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) Message-ID: <651d8c77.170a0220.d8e8d.a915@mx.google.com> Botond =?utf-8?q?István_Hprváth?=, Botond =?utf-8?q?István_Hprváth?Message-ID: In-Reply-To: https://github.com/whisperity edited https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 09:04:20 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:04:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] 19141c4 - [lldb] Mark operator== const to avoid ambiguity in C++20. (#68224) Message-ID: <651d8d04.170a0220.500f7.a6ab@mx.google.com> Author: Samira Bazuzi Date: 2023-10-04T09:04:13-07:00 New Revision: 19141c4172ea5a4979fa0419dcf712d3f0cadefc URL: https://github.com/llvm/llvm-project/commit/19141c4172ea5a4979fa0419dcf712d3f0cadefc DIFF: https://github.com/llvm/llvm-project/commit/19141c4172ea5a4979fa0419dcf712d3f0cadefc.diff LOG: [lldb] Mark operator== const to avoid ambiguity in C++20. (#68224) C++20 will automatically generate an operator== with reversed operand order, which is ambiguous with the written operator== when one argument is marked const and the other isn't. These operators currently trigger -Wambiguous-reversed-operator at usage sites lldb/source/Symbol/SymbolFileOnDemand.cpp:68 and lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp:1286. Added: Modified: lldb/include/lldb/Utility/XcodeSDK.h lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h lldb/source/Utility/XcodeSDK.cpp Removed: ################################################################################ diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h index 878b131a1814536..f8528995d549c9c 100644 --- a/lldb/include/lldb/Utility/XcodeSDK.h +++ b/lldb/include/lldb/Utility/XcodeSDK.h @@ -69,7 +69,7 @@ class XcodeSDK { XcodeSDK &operator=(const XcodeSDK &other); XcodeSDK(const XcodeSDK&) = default; - bool operator==(const XcodeSDK &other); + bool operator==(const XcodeSDK &other) const; /// Return parsed SDK type and version number. Info Parse() const; diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index 378b2472278605d..5aeaf3ae24d7c7b 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -606,8 +606,8 @@ void DynamicLoaderDarwinKernel::KextImageInfo::SetProcessStopId( m_load_process_stop_id = stop_id; } -bool DynamicLoaderDarwinKernel::KextImageInfo:: -operator==(const KextImageInfo &rhs) { +bool DynamicLoaderDarwinKernel::KextImageInfo::operator==( + const KextImageInfo &rhs) const { if (m_uuid.IsValid() || rhs.GetUUID().IsValid()) { return m_uuid == rhs.GetUUID(); } diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h index 38a60d154820a96..000c382b2c01117 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.h @@ -176,7 +176,7 @@ class DynamicLoaderDarwinKernel : public lldb_private::DynamicLoader { void SetProcessStopId(uint32_t stop_id); - bool operator==(const KextImageInfo &rhs); + bool operator==(const KextImageInfo &rhs) const; uint32_t GetAddressByteSize(); // as determined by Mach-O header diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp index 84f3ccbd01e2d07..154ddbebe8b30d5 100644 --- a/lldb/source/Utility/XcodeSDK.cpp +++ b/lldb/source/Utility/XcodeSDK.cpp @@ -56,7 +56,7 @@ XcodeSDK::XcodeSDK(XcodeSDK::Info info) : m_name(GetName(info.type).str()) { XcodeSDK &XcodeSDK::operator=(const XcodeSDK &other) = default; -bool XcodeSDK::operator==(const XcodeSDK &other) { +bool XcodeSDK::operator==(const XcodeSDK &other) const { return m_name == other.m_name; } From lldb-commits at lists.llvm.org Wed Oct 4 09:04:20 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Wed, 04 Oct 2023 09:04:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Mark operator== const to avoid ambiguity in C++20. (PR #68224) In-Reply-To: Message-ID: <651d8d04.170a0220.85327.48dc@mx.google.com> https://github.com/JDevlieghere closed https://github.com/llvm/llvm-project/pull/68224 From lldb-commits at lists.llvm.org Wed Oct 4 09:04:41 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:04:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) Message-ID: <651d8d19.170a0220.713cf.3da0@mx.google.com> Botond =?utf-8?q?István_Hprváth?=, Botond =?utf-8?q?István_Hprváth?Message-ID: In-Reply-To: https://github.com/whisperity commented: LGTM with previous discussion. I will do the commit... On the UI... :slightly_smiling_face: https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 09:05:01 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:05:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] Bugfix for chosing the correct deduction guide (PR #66487) Message-ID: <651d8d2d.170a0220.cf57c.af42@mx.google.com> Botond =?utf-8?q?Istv=C3=A1n_Hprv=C3=A1th?=, Botond =?utf-8?q?Istv=C3=A1n_Hprv=C3=A1th?= Message-ID: In-Reply-To: https://github.com/whisperity approved this pull request. > TIL there is no way to resign a review here like there was on Phab... https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 09:05:48 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Wed, 04 Oct 2023 09:05:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose Platform::Attach through the SB API (PR #68050) In-Reply-To: Message-ID: <651d8d5c.170a0220.b6166.3f6c@mx.google.com> JDevlieghere wrote: Thank you @antmox and @DavidSpickett for following up on this. https://github.com/llvm/llvm-project/pull/68050 From lldb-commits at lists.llvm.org Wed Oct 4 09:08:07 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Wed, 04 Oct 2023 09:08:07 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) Message-ID: https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/68231 Currently this non-trivial calculation is repeated multiple times, making it hard to reason about when the `byte_offset`/`member_byte_offset` is being set or not. This patch simply moves all those instances of the same calculation into a helper function. We return an optional to remain an NFC patch. Default initializing the offset would make sense but requires further analysis and can be done in a follow-up patch. >From 6fd2c0e25ea49f4b9a2ca0ad645811910cd9b1f8 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 4 Oct 2023 16:57:22 +0100 Subject: [PATCH] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic Currently this non-trivial calculation is repeated multiple times, making it hard to reason about when the `byte_offset`/`member_byte_offset` is being set or not. This patch simply moves all those instances of the same calculation into a helper function. We return an optional to remain an NFC patch. Default initializing the offset would make sense but requires further analysis and can be done in a follow-up patch. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 126 ++++++------------ 1 file changed, 39 insertions(+), 87 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..005711d6f488c7f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -519,6 +519,33 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +static std::optional +ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value, + ModuleSP module_sp) { + // With DWARF 3 and later, if the value is an integer constant, + // this form value is the offset in bytes from the beginning of + // the containing entity. + if (!form_value.BlockData()) + return form_value.Unsigned(); + + Value initialValue(0); + Value memberOffset(0); + const DWARFDataExtractor &debug_info_data = die.GetData(); + uint32_t block_length = form_value.Unsigned(); + uint32_t block_offset = + form_value.BlockData() - debug_info_data.GetDataStart(); + if (!DWARFExpression::Evaluate( + nullptr, // ExecutionContext * + nullptr, // RegisterContext * + module_sp, DataExtractor(debug_info_data, block_offset, block_length), + die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, memberOffset, + nullptr)) { + return {}; + } + + return memberOffset.ResolveValue(nullptr).UInt(); +} + lldb::TypeSP DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const DWARFDIE &die, @@ -1406,26 +1433,9 @@ void DWARFASTParserClang::ParseInheritance( encoding_form = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, nullptr, module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: @@ -2557,29 +2567,9 @@ VariantMember::VariantMember(DWARFDIE &die, lldb::ModuleSP module_sp) { break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, - block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: @@ -2608,28 +2598,9 @@ DiscriminantValue::DiscriminantValue(const DWARFDIE &die, ModuleSP module_sp) { type_ref = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: break; @@ -2686,28 +2657,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, data_bit_offset = form_value.Unsigned(); break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: From lldb-commits at lists.llvm.org Wed Oct 4 09:10:47 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:10:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651d8e87.630a0220.3303f.97cd@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes Currently this non-trivial calculation is repeated multiple times, making it hard to reason about when the `byte_offset`/`member_byte_offset` is being set or not. This patch simply moves all those instances of the same calculation into a helper function. We return an optional to remain an NFC patch. Default initializing the offset would make sense but requires further analysis and can be done in a follow-up patch. --- Full diff: https://github.com/llvm/llvm-project/pull/68231.diff 1 Files Affected: - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp (+39-87) ``````````diff diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..005711d6f488c7f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -519,6 +519,33 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +static std::optional +ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value, + ModuleSP module_sp) { + // With DWARF 3 and later, if the value is an integer constant, + // this form value is the offset in bytes from the beginning of + // the containing entity. + if (!form_value.BlockData()) + return form_value.Unsigned(); + + Value initialValue(0); + Value memberOffset(0); + const DWARFDataExtractor &debug_info_data = die.GetData(); + uint32_t block_length = form_value.Unsigned(); + uint32_t block_offset = + form_value.BlockData() - debug_info_data.GetDataStart(); + if (!DWARFExpression::Evaluate( + nullptr, // ExecutionContext * + nullptr, // RegisterContext * + module_sp, DataExtractor(debug_info_data, block_offset, block_length), + die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, memberOffset, + nullptr)) { + return {}; + } + + return memberOffset.ResolveValue(nullptr).UInt(); +} + lldb::TypeSP DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const DWARFDIE &die, @@ -1406,26 +1433,9 @@ void DWARFASTParserClang::ParseInheritance( encoding_form = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, nullptr, module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: @@ -2557,29 +2567,9 @@ VariantMember::VariantMember(DWARFDIE &die, lldb::ModuleSP module_sp) { break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, - block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: @@ -2608,28 +2598,9 @@ DiscriminantValue::DiscriminantValue(const DWARFDIE &die, ModuleSP module_sp) { type_ref = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: break; @@ -2686,28 +2657,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, data_bit_offset = form_value.Unsigned(); break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: ``````````
https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 09:11:30 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:11:30 -0700 (PDT) Subject: [Lldb-commits] [lldb] [clang] Choose non-templated ctor as deduction guide unambiguously (PR #66487) Message-ID: <651d8eb2.170a0220.be6a8.ad89@mx.google.com> Botond =?utf-8?q?István_Hprváth?=, Botond =?utf-8?q?István_Hprváth?Message-ID: In-Reply-To: https://github.com/whisperity edited https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 09:19:28 2023 From: lldb-commits at lists.llvm.org (Erich Keane via lldb-commits) Date: Wed, 04 Oct 2023 09:19:28 -0700 (PDT) Subject: [Lldb-commits] [lldb] [clang] Choose non-templated ctor as deduction guide unambiguously (PR #66487) Message-ID: <651d9090.170a0220.49dce.a729@mx.google.com> Botond =?utf-8?q?István_Hprváth?=, Botond =?utf-8?q?István_Hprváth?Message-ID: In-Reply-To: erichkeane wrote: Ah, I see, apologies. I thought you meant the resign/etc complaint meant you couldn't commit through the UI. https://github.com/llvm/llvm-project/pull/66487 From lldb-commits at lists.llvm.org Wed Oct 4 09:37:25 2023 From: lldb-commits at lists.llvm.org (Felipe de Azevedo Piovezan via lldb-commits) Date: Wed, 04 Oct 2023 09:37:25 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651d94c5.170a0220.1a838.4eb8@mx.google.com> felipepiovezan wrote: ![image](https://github.com/llvm/llvm-project/assets/5406686/f95b5fa5-8aa6-45fc-9996-6dea8d97a4d6) https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 09:41:56 2023 From: lldb-commits at lists.llvm.org (Felipe de Azevedo Piovezan via lldb-commits) Date: Wed, 04 Oct 2023 09:41:56 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651d95d4.170a0220.94571.4511@mx.google.com> felipepiovezan wrote: this must have felt good to fix! https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 09:44:18 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Wed, 04 Oct 2023 09:44:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651d9662.170a0220.14cdc.afda@mx.google.com> ================ @@ -519,6 +519,33 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +static std::optional ---------------- Michael137 wrote: The integer types are a bit inconsistent (but this didn't change with this patch). * `ParseInheritance` stores the value in an `off_t`. Then passes it to `clang::CharUnits::fromQuantity` which takes an `int64_t` * The Rust `Variant` support stores offsets as uint32_t, and so does `MemberAttributes`. But `FormValue::Unsigned()` returns a uint64_t. Would be nice to make this consistent eventually https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 09:44:28 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Wed, 04 Oct 2023 09:44:28 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651d966c.170a0220.487ac.5244@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 09:56:50 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 09:56:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb-vscode] Update installation instructions (PR #68234) Message-ID: https://github.com/walter-erquinigo created https://github.com/llvm/llvm-project/pull/68234 lldb-vscode had installation instructions based on creating a folder inside ~/.vscode/extensions, which no longer works. A different installation mechanism is needed based on a VSCode command. More can be read in the contents of this patch. Closes https://github.com/llvm/llvm-project/issues/63655 >From 8eafc95bc1a3804557adab55db997cb6ac3d37e5 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Wed, 4 Oct 2023 12:55:16 -0400 Subject: [PATCH] [lldb-vscode] Update installation instructions lldb-vscode had installation instructions based on creating a folder inside ~/.vscode/extensions, which no longer works. A different installation mechanism is needed based on a VSCode command. More can be read in the contents of this patch. Closes https://github.com/llvm/llvm-project/issues/63655 --- lldb/tools/lldb-vscode/README.md | 115 ++++++++++++++++--------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/lldb/tools/lldb-vscode/README.md b/lldb/tools/lldb-vscode/README.md index 154ccefc5f59798..6f930293126d53e 100644 --- a/lldb/tools/lldb-vscode/README.md +++ b/lldb/tools/lldb-vscode/README.md @@ -1,18 +1,20 @@ # Table of Contents -- [Introduction](#Introduction) -- [Installation](#Installation-Visual-Studio-Code) +- [Table of Contents](#table-of-contents) +- [Introduction](#introduction) +- [Installation for Visual Studio Code](#installation-for-visual-studio-code) - [Configurations](#configurations) - - [Launch Configuration Settings](#launch-configuration-settings) - - [Attach Configuration Settings](#attach-configuration-settings) - - [Example configurations](#example-configurations) - - [Launching](#launching) - - [Attach to process using process ID](#attach-using-pid) - - [Attach to process by name](#attach-by-name) - - [Loading a core file](#loading-a-core-file) -- [Custom Debugger Commands](#custom-debugger-commands) - - [startDebugging](#startDebugging) + - [Launch Configuration Settings](#launch-configuration-settings) + - [Attaching Settings](#attaching-settings) + - [Example configurations](#example-configurations) + - [Launching](#launching) + - [Attach using PID](#attach-using-pid) + - [Attach by Name](#attach-by-name) + - [Loading a Core File](#loading-a-core-file) +- [Custom debugger commands](#custom-debugger-commands) + - [startDebugging](#startdebugging) + - [repl-mode](#repl-mode) # Introduction @@ -24,52 +26,57 @@ get a full featured debugger with a well defined protocol. # Installation for Visual Studio Code -Installing the plug-in involves creating a directory in the `~/.vscode/extensions` folder and copying the package.json file that is in the same directory as this -documentation into it, and copying to symlinking a lldb-vscode binary into -the `bin` directory inside the plug-in directory. - -If you want to make a stand alone plug-in that you can send to others on unix systems: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp /path/to/a/built/lldb-vscode . -$ cp /path/to/a/built/liblldb.so . +Installing the plug-in involves creating a directory in any location outside of +`~/.vscode/extensions`. For example, `~/vscode-lldb` is a valid one. You'll also +need a subfolder `bin`, e.g. `~/vscode-lldb/bin`. Then copy the `package.json` +file that is in the same directory as this documentation into it, and symlink +the `lldb-vscode` binary into the `bin` directory inside the plug-in directory. + +Finally, on VS Code, execute the command +`Developer: Install Extension from Location` and pick the folder you just +created, which would be `~/vscode-lldb` following the example above. + +If you want to make a stand alone plug-in that you can send to others on UNIX +systems: + +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +cp /path/to/a/built/lldb-vscode . +cp /path/to/a/built/liblldb.so . ``` -It is important to note that the directory `~/.vscode/extensions` works for users logged in locally to the machine. If you are remoting into the box using Visual Studio Code's Remote plugins (SSH, WSL, Docker) it will look for extensions on `~/.vscode-server/extensions` only and you will not see your just installed lldb-vscode plug-in. If you want this plugin to be visible to remoting users, you will need to either repeat the process above for the `~/.vscode-server` folder or create a symbolic link from it to `~/.vscode/extensions`: +If you want to make a stand alone plug-in that you can send to others on macOS +systems: -``` -$ cd ~/.vscode-server/extensions -$ ln -s ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 llvm-org.lldb-vscode-0.1.0 +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +cp /path/to/a/built/lldb-vscode . +rsync -av /path/to/a/built/LLDB.framework LLDB.framework ``` -If you want to make a stand alone plug-in that you can send to others on macOS systems: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp /path/to/a/built/lldb-vscode . -$ rsync -av /path/to/a/built/LLDB.framework LLDB.framework +You might need to create additional directories for the `liblldb.so` or +`LLDB.framework` inside or next to the `bin` folder depending on how the +[rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-vscode` +binary. By default the `Debug` builds of LLDB usually includes +the current executable directory in the rpath, so these steps should work for +most people. + +To create a plug-in that symlinks into your `lldb-vscode` in your build +directory: + +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +ln -s /path/to/a/built/lldb-vscode ``` -You might need to create additional directories for the `liblldb.so` or `LLDB.framework` inside or next to the `bin` folder depending on how the [rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-vscode` binary. By default the `Debug` builds of LLDB usually includes -the current executable directory in the rpath, so these steps should work for most people. - -To create a plug-in that symlinks into your `lldb-vscode` in your build directory: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ ln -s /path/to/a/built/lldb-vscode -``` - -This is handy if you want to debug and develope the `lldb-vscode` executable when adding features or fixing bugs. - - +This is handy if you want to debug and develope the `lldb-vscode` executable +when adding features or fixing bugs. # Configurations @@ -127,7 +134,6 @@ The JSON configuration file can contain the following `lldb-vscode` specific lau |**terminateCommands** |[string]| | LLDB commands executed when the debugging session ends. Commands and command output will be sent to the debugger console when they are executed. |**attachCommands** |[string]| | LLDB commands that will be executed after **preRunCommands** which take place of the code that normally does the attach. The commands can create a new target and attach or launch it however desired. This allows custom launch and attach configurations. Core files can use `target create --core /path/to/core` to attach to core files. - ## Example configurations ### Launching @@ -191,7 +197,6 @@ to be launched you can add the "waitFor" key value pair: This will work as long as the architecture, vendor and OS supports waiting for processes. Currently MacOS is the only platform that supports this. - ### Loading a Core File This loads the coredump file `/cores/123.core` associated with the program @@ -242,12 +247,12 @@ This will launch a server and then request a child debug session for a client. Inspect or adjust the behavior of lldb-vscode repl evaluation requests. The supported modes are `variable`, `command` and `auto`. -* `variable` - Variable mode expressions are evaluated in the context of the +- `variable` - Variable mode expressions are evaluated in the context of the current frame. Use a `\`` prefix on the command to run an lldb command. -* `command` - Command mode expressions are evaluated as lldb commands, as a +- `command` - Command mode expressions are evaluated as lldb commands, as a result, values printed by lldb are always stringified representations of the expression output. -* `auto` - Auto mode will attempt to infer if the expression represents an lldb +- `auto` - Auto mode will attempt to infer if the expression represents an lldb command or a variable expression. A heuristic is used to infer if the input represents a variable or a command. Use a `\`` prefix to ensure an expression is evaluated as a command. From lldb-commits at lists.llvm.org Wed Oct 4 09:57:45 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 09:57:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb-vscode] Update installation instructions (PR #68234) In-Reply-To: Message-ID: <651d9989.a70a0220.466fe.9bcb@mx.google.com> https://github.com/walter-erquinigo ready_for_review https://github.com/llvm/llvm-project/pull/68234 From lldb-commits at lists.llvm.org Wed Oct 4 09:58:53 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 09:58:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb-vscode] Update installation instructions (PR #68234) In-Reply-To: Message-ID: <651d99cd.a70a0220.4338b.9e8a@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes lldb-vscode had installation instructions based on creating a folder inside ~/.vscode/extensions, which no longer works. A different installation mechanism is needed based on a VSCode command. More can be read in the contents of this patch. Closes https://github.com/llvm/llvm-project/issues/63655 --- Full diff: https://github.com/llvm/llvm-project/pull/68234.diff 1 Files Affected: - (modified) lldb/tools/lldb-vscode/README.md (+60-55) ``````````diff diff --git a/lldb/tools/lldb-vscode/README.md b/lldb/tools/lldb-vscode/README.md index 154ccefc5f59798..6f930293126d53e 100644 --- a/lldb/tools/lldb-vscode/README.md +++ b/lldb/tools/lldb-vscode/README.md @@ -1,18 +1,20 @@ # Table of Contents -- [Introduction](#Introduction) -- [Installation](#Installation-Visual-Studio-Code) +- [Table of Contents](#table-of-contents) +- [Introduction](#introduction) +- [Installation for Visual Studio Code](#installation-for-visual-studio-code) - [Configurations](#configurations) - - [Launch Configuration Settings](#launch-configuration-settings) - - [Attach Configuration Settings](#attach-configuration-settings) - - [Example configurations](#example-configurations) - - [Launching](#launching) - - [Attach to process using process ID](#attach-using-pid) - - [Attach to process by name](#attach-by-name) - - [Loading a core file](#loading-a-core-file) -- [Custom Debugger Commands](#custom-debugger-commands) - - [startDebugging](#startDebugging) + - [Launch Configuration Settings](#launch-configuration-settings) + - [Attaching Settings](#attaching-settings) + - [Example configurations](#example-configurations) + - [Launching](#launching) + - [Attach using PID](#attach-using-pid) + - [Attach by Name](#attach-by-name) + - [Loading a Core File](#loading-a-core-file) +- [Custom debugger commands](#custom-debugger-commands) + - [startDebugging](#startdebugging) + - [repl-mode](#repl-mode) # Introduction @@ -24,52 +26,57 @@ get a full featured debugger with a well defined protocol. # Installation for Visual Studio Code -Installing the plug-in involves creating a directory in the `~/.vscode/extensions` folder and copying the package.json file that is in the same directory as this -documentation into it, and copying to symlinking a lldb-vscode binary into -the `bin` directory inside the plug-in directory. - -If you want to make a stand alone plug-in that you can send to others on unix systems: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp /path/to/a/built/lldb-vscode . -$ cp /path/to/a/built/liblldb.so . +Installing the plug-in involves creating a directory in any location outside of +`~/.vscode/extensions`. For example, `~/vscode-lldb` is a valid one. You'll also +need a subfolder `bin`, e.g. `~/vscode-lldb/bin`. Then copy the `package.json` +file that is in the same directory as this documentation into it, and symlink +the `lldb-vscode` binary into the `bin` directory inside the plug-in directory. + +Finally, on VS Code, execute the command +`Developer: Install Extension from Location` and pick the folder you just +created, which would be `~/vscode-lldb` following the example above. + +If you want to make a stand alone plug-in that you can send to others on UNIX +systems: + +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +cp /path/to/a/built/lldb-vscode . +cp /path/to/a/built/liblldb.so . ``` -It is important to note that the directory `~/.vscode/extensions` works for users logged in locally to the machine. If you are remoting into the box using Visual Studio Code's Remote plugins (SSH, WSL, Docker) it will look for extensions on `~/.vscode-server/extensions` only and you will not see your just installed lldb-vscode plug-in. If you want this plugin to be visible to remoting users, you will need to either repeat the process above for the `~/.vscode-server` folder or create a symbolic link from it to `~/.vscode/extensions`: +If you want to make a stand alone plug-in that you can send to others on macOS +systems: -``` -$ cd ~/.vscode-server/extensions -$ ln -s ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 llvm-org.lldb-vscode-0.1.0 +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +cp /path/to/a/built/lldb-vscode . +rsync -av /path/to/a/built/LLDB.framework LLDB.framework ``` -If you want to make a stand alone plug-in that you can send to others on macOS systems: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp /path/to/a/built/lldb-vscode . -$ rsync -av /path/to/a/built/LLDB.framework LLDB.framework +You might need to create additional directories for the `liblldb.so` or +`LLDB.framework` inside or next to the `bin` folder depending on how the +[rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-vscode` +binary. By default the `Debug` builds of LLDB usually includes +the current executable directory in the rpath, so these steps should work for +most people. + +To create a plug-in that symlinks into your `lldb-vscode` in your build +directory: + +```bash +mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin +cp package.json ~/llvm-org.lldb-vscode-0.1.0 +cd ~/llvm-org.lldb-vscode-0.1.0/bin +ln -s /path/to/a/built/lldb-vscode ``` -You might need to create additional directories for the `liblldb.so` or `LLDB.framework` inside or next to the `bin` folder depending on how the [rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-vscode` binary. By default the `Debug` builds of LLDB usually includes -the current executable directory in the rpath, so these steps should work for most people. - -To create a plug-in that symlinks into your `lldb-vscode` in your build directory: - -``` -$ mkdir -p ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ cp package.json ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0 -$ cd ~/.vscode/extensions/llvm-org.lldb-vscode-0.1.0/bin -$ ln -s /path/to/a/built/lldb-vscode -``` - -This is handy if you want to debug and develope the `lldb-vscode` executable when adding features or fixing bugs. - - +This is handy if you want to debug and develope the `lldb-vscode` executable +when adding features or fixing bugs. # Configurations @@ -127,7 +134,6 @@ The JSON configuration file can contain the following `lldb-vscode` specific lau |**terminateCommands** |[string]| | LLDB commands executed when the debugging session ends. Commands and command output will be sent to the debugger console when they are executed. |**attachCommands** |[string]| | LLDB commands that will be executed after **preRunCommands** which take place of the code that normally does the attach. The commands can create a new target and attach or launch it however desired. This allows custom launch and attach configurations. Core files can use `target create --core /path/to/core` to attach to core files. - ## Example configurations ### Launching @@ -191,7 +197,6 @@ to be launched you can add the "waitFor" key value pair: This will work as long as the architecture, vendor and OS supports waiting for processes. Currently MacOS is the only platform that supports this. - ### Loading a Core File This loads the coredump file `/cores/123.core` associated with the program @@ -242,12 +247,12 @@ This will launch a server and then request a child debug session for a client. Inspect or adjust the behavior of lldb-vscode repl evaluation requests. The supported modes are `variable`, `command` and `auto`. -* `variable` - Variable mode expressions are evaluated in the context of the +- `variable` - Variable mode expressions are evaluated in the context of the current frame. Use a `\`` prefix on the command to run an lldb command. -* `command` - Command mode expressions are evaluated as lldb commands, as a +- `command` - Command mode expressions are evaluated as lldb commands, as a result, values printed by lldb are always stringified representations of the expression output. -* `auto` - Auto mode will attempt to infer if the expression represents an lldb +- `auto` - Auto mode will attempt to infer if the expression represents an lldb command or a variable expression. A heuristic is used to infer if the input represents a variable or a command. Use a `\`` prefix to ensure an expression is evaluated as a command. ``````````
https://github.com/llvm/llvm-project/pull/68234 From lldb-commits at lists.llvm.org Wed Oct 4 11:08:02 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:08:02 -0700 (PDT) Subject: [Lldb-commits] =?utf-8?q?=5Blldb=5D_Add_the_ability_to_get_a_C++_?= =?utf-8?q?vtable_ValueObject_from_another_ValueObj=E2=80=A6_=28PR_=236759?= =?utf-8?q?9=29?= In-Reply-To: Message-ID: <651daa02.a70a0220.f2468.a3e5@mx.google.com> https://github.com/clayborg commented: I think all review comments have been taken into account. Anything else needed? https://github.com/llvm/llvm-project/pull/67599 From lldb-commits at lists.llvm.org Wed Oct 4 11:10:54 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:10:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651daaae.620a0220.f368d.a960@mx.google.com> https://github.com/clayborg approved this pull request. Looks good! https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Wed Oct 4 11:13:22 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Wed, 04 Oct 2023 11:13:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651dab42.170a0220.7464b.b752@mx.google.com> bulbazord wrote: I have no problem with putting things form SymbolFileDWARF into its own namespace. Let's wait a bit though to see if anyone else has any opinions. https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Wed Oct 4 11:16:07 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 11:16:07 -0700 (PDT) Subject: [Lldb-commits] =?utf-8?q?=5Blldb=5D_Add_the_ability_to_get_a_C++_?= =?utf-8?q?vtable_ValueObject_from_another_ValueObj=E2=80=A6_=28PR_=236759?= =?utf-8?q?9=29?= In-Reply-To: Message-ID: <651dabe7.630a0220.39108.9fcf@mx.google.com> jeffreytan81 wrote: > > > > * Can we test this during multiple inheritance? Should we print multiple vtables? > > > > [Greg] I can add a test for this and make sure things works when dynamic typing is on and off. We won't print multiple vtables, as each class has only 1 vtable, it will just include all of the virtual methods needed for any inherited classes. > > > > > > I remember some compiler's multi-inheritance implementation is putting one vtable_ptr in object for each parent class. Maybe not in clang? > > Each class has a single vtable and this table will contain a copy of all vtables that are needed for each class. I was talking about https://shaharmike.com/cpp/vtable-part2/ which there can be multiple vtable_ptr(s) in object and multiple vtables. But I think you are right that we only care showing the final merged vtable from most derived child class not other vtables containing non-virtual thunk methods. https://github.com/llvm/llvm-project/pull/67599 From lldb-commits at lists.llvm.org Wed Oct 4 11:16:58 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:16:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651dac1a.170a0220.a5231.b973@mx.google.com> =?utf-8?q?Jos=C3=A9?= L. Junior Message-ID: In-Reply-To: https://github.com/clayborg commented: Just remove the includes that are no longer needed and this is good to go! https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Wed Oct 4 11:16:59 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:16:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651dac1b.050a0220.9b25c.a5d0@mx.google.com> =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: ================ @@ -8,18 +8,21 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandCompletions.h" #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" ---------------- clayborg wrote: we can remove this and the above includes now that we are doing the heavy work in Target.cpp https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Wed Oct 4 11:16:59 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:16:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) Message-ID: <651dac1b.170a0220.66017.53f2@mx.google.com> =?utf-8?q?José?= L. Junior Message-ID: In-Reply-To: https://github.com/clayborg edited https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Wed Oct 4 11:17:17 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 11:17:17 -0700 (PDT) Subject: [Lldb-commits] =?utf-8?q?=5Blldb=5D_Add_the_ability_to_get_a_C++_?= =?utf-8?q?vtable_ValueObject_from_another_ValueObj=E2=80=A6_=28PR_=236759?= =?utf-8?q?9=29?= In-Reply-To: Message-ID: <651dac2d.170a0220.cf345.b6c7@mx.google.com> https://github.com/jeffreytan81 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/67599 From lldb-commits at lists.llvm.org Wed Oct 4 11:22:55 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:22:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651dad7f.170a0220.bb82f.b786@mx.google.com> https://github.com/clayborg commented: I have no issues with putting this into a namespace either. Just a question in my inline comment https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Wed Oct 4 11:22:55 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:22:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651dad7f.170a0220.ad1f2.b83f@mx.google.com> https://github.com/clayborg edited https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Wed Oct 4 11:22:56 2023 From: lldb-commits at lists.llvm.org (Greg Clayton via lldb-commits) Date: Wed, 04 Oct 2023 11:22:56 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651dad80.170a0220.91293.bcf0@mx.google.com> ================ @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { ---------------- clayborg wrote: Do we do this anywhere else? Should this be split into: ``` namespace lldb_plugin { namespace dwarf { ``` Not sure if there is a llvm coding convention for this? https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Wed Oct 4 11:33:48 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 11:33:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <651db00c.620a0220.513cf.a012@mx.google.com> ================ @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { ---------------- walter-erquinigo wrote: This is possible after the switch to c++17, and that's probably why no one else is using it in LLDB. There's no explicit llvm coding convention for this. I'm fine with splitting it into two lines if you want. https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Wed Oct 4 14:17:16 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Wed, 04 Oct 2023 14:17:16 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651dd65c.a70a0220.26c2b.b4c5@mx.google.com> https://github.com/junior-jl updated https://github.com/llvm/llvm-project/pull/67019 >From 6de148adcdd1eedea7e23b4e267c6f42bb68bc45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Tue, 3 Oct 2023 15:28:45 -0300 Subject: [PATCH 1/3] [lldb] add stop-at-user-entry option to process launch --- lldb/include/lldb/Target/Language.h | 4 ++ lldb/include/lldb/Target/Target.h | 2 + .../Commands/CommandOptionsProcessLaunch.cpp | 13 +++++- lldb/source/Commands/Options.td | 4 ++ .../Language/CPlusPlus/CPlusPlusLanguage.h | 2 + .../Plugins/Language/ObjC/ObjCLanguage.h | 2 + .../ObjCPlusPlus/ObjCPlusPlusLanguage.h | 2 + lldb/source/Target/Target.cpp | 42 +++++++++++++++++++ .../command-process-launch-user-entry.test | 8 ++++ 9 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 lldb/test/Shell/Commands/command-process-launch-user-entry.test diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index a6b9ccaf31b3c42..d53089ba4a59974 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -160,6 +160,10 @@ class Language : public PluginInterface { virtual lldb::LanguageType GetLanguageType() const = 0; + // Implement this function to return the user-defined entry point name + // for the language + virtual llvm::StringRef GetUserEntryPointName() const { return {}; } + virtual bool IsTopLevelFunction(Function &function); virtual bool IsSourceFile(llvm::StringRef file_path) const = 0; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index e9e531d0e12640a..82a343ee03fb516 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,6 +654,8 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); + lldb::BreakpointSP CreateBreakpointAtUserEntry(); + // Use this to create a file and line breakpoint to a given module or all // module it is nullptr lldb::BreakpointSP CreateBreakpoint(const FileSpecList *containingModules, diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 85ad8ff5e07132c..3055e4ca45bd230 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -8,6 +8,7 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" @@ -15,11 +16,13 @@ #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; using namespace lldb; @@ -38,7 +41,13 @@ Status CommandOptionsProcessLaunch::SetOptionValue( case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; - + case 'm': // Stop at user entry point + { + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); + target_sp->CreateBreakpointAtUserEntry(); + break; + } case 'i': // STDIN for read only { FileAction action; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 04830b8b990efae..dd4cf5c4dc043e7 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -675,6 +675,10 @@ let Command = "platform shell" in { let Command = "process launch" in { def process_launch_stop_at_entry : Option<"stop-at-entry", "s">, Desc<"Stop at the entry point of the program when launching a process.">; + def process_launch_stop_at_user_entry : Option<"stop-at-user-entry", "m">, + Desc<"Stop at the user entry point when launching a process. For C based " + "languages this will be the 'main' function, but this might differ for " + "other languages.">; def process_launch_disable_aslr : Option<"disable-aslr", "A">, Arg<"Boolean">, Desc<"Set whether to disable address space layout randomization when launching a process.">; def process_launch_plugin : Option<"plugin", "P">, Arg<"Plugin">, diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 7712a60b7795951..623d481bf117f48 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -103,6 +103,8 @@ class CPlusPlusLanguage : public Language { return lldb::eLanguageTypeC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + std::unique_ptr GetTypeScavenger() override; lldb::TypeCategoryImplSP GetFormatters() override; diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index bb8057846bb7c30..a50f4b036108d7a 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -127,6 +127,8 @@ class ObjCLanguage : public Language { return lldb::eLanguageTypeObjC; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + // Get all possible names for a method. Examples: // If method_name is "+[NSString(my_additions) myStringWithCString:]" // variant_names[0] => "+[NSString myStringWithCString:]" diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h index b7c71b5dbb1c991..1beab9348eb72e8 100644 --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -27,6 +27,8 @@ class ObjCPlusPlusLanguage : public Language { return lldb::eLanguageTypeObjC_plus_plus; } + llvm::StringRef GetUserEntryPointName() const override { return "main"; } + llvm::StringRef GetNilReferenceSummaryString() override { return "nil"; } bool IsSourceFile(llvm::StringRef file_path) const override; diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f197b1b1aa097c3..013d72bda6308be 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,6 +335,48 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } +lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { + TargetSP target_sp = shared_from_this(); + Status error; + ModuleSP main_module_sp = target_sp->GetExecutableModule(); + FileSpecList shared_lib_filter; + shared_lib_filter.Append(main_module_sp->GetFileSpec()); + llvm::SetVector, + std::unordered_set> + entryPointNamesSet; + for (LanguageType lang_type : Language::GetSupportedLanguages()) { + Language *lang = Language::FindPlugin(lang_type); + if (!lang) { + error.SetErrorString("Language not found\n"); + return lldb::BreakpointSP(); + } + std::string entryPointName = lang->GetUserEntryPointName().str(); + if (!entryPointName.empty()) + entryPointNamesSet.insert(entryPointName); + } + if (entryPointNamesSet.empty()) { + error.SetErrorString("No entry point name found\n"); + return lldb::BreakpointSP(); + } + BreakpointSP bp_sp = + target_sp->CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware + ); + if (!bp_sp) { + error.SetErrorString("Breakpoint creation failed.\n"); + return lldb::BreakpointSP(); + } + bp_sp->SetOneShot(true); + return bp_sp; +} + BreakpointSP Target::CreateSourceRegexBreakpoint( const FileSpecList *containingModules, const FileSpecList *source_file_spec_list, diff --git a/lldb/test/Shell/Commands/command-process-launch-user-entry.test b/lldb/test/Shell/Commands/command-process-launch-user-entry.test new file mode 100644 index 000000000000000..32ef710fe567439 --- /dev/null +++ b/lldb/test/Shell/Commands/command-process-launch-user-entry.test @@ -0,0 +1,8 @@ +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -s %s -o exit | FileCheck %s + +process launch -m +# CHECK-LABEL: process launch -m +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = one-shot breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c \ No newline at end of file >From 8e0151842db6d261535f26e48a9194e9d90830fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Tue, 3 Oct 2023 18:08:39 -0300 Subject: [PATCH 2/3] refactor CreateBreakpointAtUserEntry method --- lldb/include/lldb/Target/Target.h | 2 +- .../Commands/CommandOptionsProcessLaunch.cpp | 10 +++----- lldb/source/Target/Target.cpp | 25 +++++++++---------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 82a343ee03fb516..8752b42a9518983 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -654,7 +654,7 @@ class Target : public std::enable_shared_from_this, lldb::BreakpointSP GetBreakpointByID(lldb::break_id_t break_id); - lldb::BreakpointSP CreateBreakpointAtUserEntry(); + lldb::BreakpointSP CreateBreakpointAtUserEntry(Status &error); // Use this to create a file and line breakpoint to a given module or all // module it is nullptr diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index 3055e4ca45bd230..e3a9cc8aca2356e 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -37,17 +37,15 @@ Status CommandOptionsProcessLaunch::SetOptionValue( Status error; const int short_option = g_process_launch_options[option_idx].short_option; + TargetSP target_sp = + execution_context ? execution_context->GetTargetSP() : TargetSP(); switch (short_option) { case 's': // Stop at program entry point launch_info.GetFlags().Set(eLaunchFlagStopAtEntry); break; case 'm': // Stop at user entry point - { - TargetSP target_sp = - execution_context ? execution_context->GetTargetSP() : TargetSP(); - target_sp->CreateBreakpointAtUserEntry(); + target_sp->CreateBreakpointAtUserEntry(error); break; - } case 'i': // STDIN for read only { FileAction action; @@ -98,8 +96,6 @@ Status CommandOptionsProcessLaunch::SetOptionValue( break; case 'a': { - TargetSP target_sp = - execution_context ? execution_context->GetTargetSP() : TargetSP(); PlatformSP platform_sp = target_sp ? target_sp->GetPlatform() : PlatformSP(); launch_info.GetArchitecture() = diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 013d72bda6308be..eaf9e51a7e56a90 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -335,10 +335,9 @@ BreakpointSP Target::GetBreakpointByID(break_id_t break_id) { return bp_sp; } -lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { - TargetSP target_sp = shared_from_this(); - Status error; - ModuleSP main_module_sp = target_sp->GetExecutableModule(); +lldb::BreakpointSP +lldb_private::Target::CreateBreakpointAtUserEntry(Status &error) { + ModuleSP main_module_sp = GetExecutableModule(); FileSpecList shared_lib_filter; shared_lib_filter.Append(main_module_sp->GetFileSpec()); llvm::SetVector, @@ -359,15 +358,15 @@ lldb::BreakpointSP lldb_private::Target::CreateBreakpointAtUserEntry() { return lldb::BreakpointSP(); } BreakpointSP bp_sp = - target_sp->CreateBreakpoint(&shared_lib_filter, - nullptr, // containingSourceFiles - entryPointNamesSet.takeVector(), - eFunctionNameTypeFull, // func_name_type_mask - eLanguageTypeUnknown, // language - 0, // offset - eLazyBoolNo, // skip_prologue - false, // internal - false // hardware + CreateBreakpoint(&shared_lib_filter, + nullptr, // containingSourceFiles + entryPointNamesSet.takeVector(), + eFunctionNameTypeFull, // func_name_type_mask + eLanguageTypeUnknown, // language + 0, // offset + eLazyBoolNo, // skip_prologue + false, // internal + false // hardware ); if (!bp_sp) { error.SetErrorString("Breakpoint creation failed.\n"); >From 62edff5495e5a3e7e399a93f47ab4bc3d07ecdf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Junior?= Date: Wed, 4 Oct 2023 18:16:59 -0300 Subject: [PATCH 3/3] remove unecessary includes --- lldb/source/Commands/CommandOptionsProcessLaunch.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index e3a9cc8aca2356e..b1c13d4df79e0ee 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -8,7 +8,6 @@ #include "CommandOptionsProcessLaunch.h" -#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" @@ -16,13 +15,11 @@ #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" -#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" + #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; using namespace lldb; From lldb-commits at lists.llvm.org Wed Oct 4 14:17:26 2023 From: lldb-commits at lists.llvm.org (=?UTF-8?Q?Jos=C3=A9_Lira_Junior?= via lldb-commits) Date: Wed, 04 Oct 2023 14:17:26 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] add stop-at-user-entry option to process launch (PR #67019) In-Reply-To: Message-ID: <651dd666.a70a0220.9b00b.b8e4@mx.google.com> ================ @@ -8,18 +8,21 @@ #include "CommandOptionsProcessLaunch.h" +#include "lldb/Core/Module.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandCompletions.h" #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" - #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" ---------------- junior-jl wrote: Done! https://github.com/llvm/llvm-project/pull/67019 From lldb-commits at lists.llvm.org Wed Oct 4 14:31:31 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 14:31:31 -0700 (PDT) Subject: [Lldb-commits] =?utf-8?q?=5Blldb=5D_Add_the_ability_to_get_a_C++_?= =?utf-8?q?vtable_ValueObject_from_another_ValueObj=E2=80=A6_=28PR_=236759?= =?utf-8?q?9=29?= In-Reply-To: Message-ID: <651dd9b3.170a0220.e8843.07f8@mx.google.com> jimingham wrote: The other bit that we mentioned in the course of this review was showing invalid vtables. You still have the code that just makes an error in the child creation if you can't look up the value that a vtable slot is pointing to. It would be better to make the child and report that it looks bogus somehow. https://github.com/llvm/llvm-project/pull/67599 From lldb-commits at lists.llvm.org Wed Oct 4 14:43:02 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Wed, 04 Oct 2023 14:43:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651ddc66.170a0220.fc642.01d2@mx.google.com> bulbazord wrote: Personally I have no qualms about this. It's just an option that lets you choose which symbols to export. We already allow exporting every symbol, why not just some of them? I would be careful though, since we don't guarantee any stability for lldb_private (including things from plugins) relying on this could come back to bite you. https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 15:01:34 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 15:01:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651de0be.170a0220.8802c.0248@mx.google.com> jimingham wrote: I think we need to make it clear wherever in the build system that the knob for turning on these exports lives that you use these symbols at your own risk, and we guarantee NO ABI stability for anything but the SB API's. We know that on this list but somebody getting the sources might think this was a supported configuration and be sad further down the line. Jim > On Oct 4, 2023, at 2:43 PM, Alex Langford ***@***.***> wrote: > > > Personally I have no qualms about this. It's just an option that lets you choose which symbols to export. We already allow exporting every symbol, why not just some of them? I would be careful though, since we don't guarantee any stability for lldb_private (including things from plugins) relying on this could come back to bite you. > > — > Reply to this email directly, view it on GitHub , or unsubscribe . > You are receiving this because you are on a team that was mentioned. > https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 15:03:22 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Wed, 04 Oct 2023 15:03:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651de12a.170a0220.3139f.01f7@mx.google.com> bulbazord wrote: > I think we need to make it clear wherever in the build system that the knob for turning on these exports lives that you use these symbols at your own risk, and we guarantee NO ABI stability for anything but the SB API's. We know that on this list but somebody getting the sources might think this was a supported configuration and be sad further down the line. Jim In that case, maybe we should emit a CMake warning instead of just an info message? https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 15:30:01 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Wed, 04 Oct 2023 15:30:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651de769.170a0220.1f2ff.0a5c@mx.google.com> jimingham wrote: That sounds appropriate to me. Jim > On Oct 4, 2023, at 3:03 PM, Alex Langford ***@***.***> wrote: > > > I think we need to make it clear wherever in the build system that the knob for turning on these exports lives that you use these symbols at your own risk, and we guarantee NO ABI stability for anything but the SB API's. We know that on this list but somebody getting the sources might think this was a supported configuration and be sad further down the line. Jim > > In that case, maybe we should emit a CMake warning instead of just an info message? > > — > Reply to this email directly, view it on GitHub , or unsubscribe . > You are receiving this because you are on a team that was mentioned. > https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 16:37:23 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 16:37:23 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651df733.170a0220.16f87.0745@mx.google.com> walter-erquinigo wrote: I'll add the warning as you guys mention. That'll set clear expectations on what users will be getting. Besides that, @bulbazord , the current symbols getting exported by `third-party/llvm-project/lldb/source/API/liblldb-private.exports` are not all the symbols, but just some. Some folks like me need access to symbols from specific plugins that don't use the lldb_private namespace, so being able to replace the list that the file mentioned above exports is fundamental. https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 18:09:26 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 18:09:26 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651e0cc6.a70a0220.7ff03.0f92@mx.google.com> https://github.com/walter-erquinigo updated https://github.com/llvm/llvm-project/pull/68013 >From d557ea59b9c24387244280566260c33ac6bb9367 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 13:56:00 -0400 Subject: [PATCH] [LLDB] Allow specifying a custom exports file LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `third-party/llvm-project/lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 --- lldb/cmake/modules/LLDBConfig.cmake | 3 +++ lldb/source/API/CMakeLists.txt | 13 ++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 380016ce48015fa..264eed1ad82012f 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -125,6 +125,9 @@ endif() set(LLDB_EXPORT_ALL_SYMBOLS 0 CACHE BOOL "Causes lldb to export all symbols when building liblldb.") +set(LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE "" CACHE PATH + "When `LLDB_EXPORT_ALL_SYMBOLS` is enabled, this specifies the exports file to use when building liblldb.") + if ((NOT MSVC) OR MSVC12) add_definitions( -DHAVE_ROUND ) endif() diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..a574a461d4920ae 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -177,11 +177,18 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") # from working on some systems but limits the liblldb size. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb namespace") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb.exports) - else() - # Don't use an explicit export. Instead, tell the linker to - # export all symbols. + elseif (NOT LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE) + # Don't use an explicit export. Instead, tell the linker to export all symbols. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports) + else () + MESSAGE("-- Symbols (liblldb): exporting all symbols specified in the exports " + " file '${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}'") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") + add_llvm_symbol_exports(liblldb "${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}") endif() set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") elseif (LLDB_EXPORT_ALL_SYMBOLS) From lldb-commits at lists.llvm.org Wed Oct 4 18:11:56 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 18:11:56 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651e0d5c.620a0220.4affa.0dc6@mx.google.com> https://github.com/walter-erquinigo edited https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Wed Oct 4 18:13:18 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Wed, 04 Oct 2023 18:13:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651e0dae.a70a0220.75c31.0dbd@mx.google.com> https://github.com/walter-erquinigo updated https://github.com/llvm/llvm-project/pull/68013 >From a472a16e6032ce0cef0acae6957f690f7e6cc4a3 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 13:56:00 -0400 Subject: [PATCH] [LLDB] Allow specifying a custom exports file LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `third-party/llvm-project/lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 --- lldb/cmake/modules/LLDBConfig.cmake | 5 ++++- lldb/source/API/CMakeLists.txt | 13 ++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 380016ce48015fa..ce5e666a6f5e1ac 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -123,7 +123,10 @@ if(APPLE AND CMAKE_GENERATOR STREQUAL Xcode) endif() set(LLDB_EXPORT_ALL_SYMBOLS 0 CACHE BOOL - "Causes lldb to export all symbols when building liblldb.") + "Causes lldb to export some private symbols when building liblldb. See lldb/source/API/liblldb-private.exports for the full list of symbols that get exported.") + +set(LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE "" CACHE PATH + "When `LLDB_EXPORT_ALL_SYMBOLS` is enabled, this specifies the exports file to use when building liblldb.") if ((NOT MSVC) OR MSVC12) add_definitions( -DHAVE_ROUND ) diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..a574a461d4920ae 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -177,11 +177,18 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") # from working on some systems but limits the liblldb size. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb namespace") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb.exports) - else() - # Don't use an explicit export. Instead, tell the linker to - # export all symbols. + elseif (NOT LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE) + # Don't use an explicit export. Instead, tell the linker to export all symbols. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports) + else () + MESSAGE("-- Symbols (liblldb): exporting all symbols specified in the exports " + " file '${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}'") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") + add_llvm_symbol_exports(liblldb "${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}") endif() set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") elseif (LLDB_EXPORT_ALL_SYMBOLS) From lldb-commits at lists.llvm.org Thu Oct 5 00:41:05 2023 From: lldb-commits at lists.llvm.org (Pavel Kosov via lldb-commits) Date: Thu, 05 Oct 2023 00:41:05 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Add support for changing char in Scalar::SetValueFromCString (PR #67784) In-Reply-To: Message-ID: <651e6891.170a0220.959eb.2d1d@mx.google.com> kpdev wrote: > This seems like a somewhat limited way to poke a character into the value if the string has more than one character already in it. > > If you are trying to do more fancy setting of the contents of an SBValue, then it would be more straightforward to get the SBData for the value with GetData, then you have access to the actual bytes in the data, and you can poke in values wherever you want. I think that might be a better approach than trying to get SetValueFromCString to handle changing single character ValueObjects. The main purpose of all these patches is to be able to update string value during debug process in IDE (vscode for example). LLDB communication with IDE is not straight, it uses some external tools for this, e.g.: When we want to change value in the vscode, firstly vscode send request to lldb-mi through `Debug Adapter Protocol`, then lldb-mi asks lldb to change received value through `SetValueFromCString` api, so if we would like to avoid using this api - we need to add such support on the lldb-mi side. But it is not necessary, that IDE will communicate to the lldb-mi, it can send requests to any tool which supports DAP, and this tool will probably use `SetValueFromCString` api https://github.com/llvm/llvm-project/pull/67784 From lldb-commits at lists.llvm.org Thu Oct 5 01:00:28 2023 From: lldb-commits at lists.llvm.org (Pavel Kosov via lldb-commits) Date: Thu, 05 Oct 2023 01:00:28 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Add support for updating string during debug process (PR #67782) In-Reply-To: Message-ID: <651e6d1c.a70a0220.6c643.2429@mx.google.com> kpdev wrote: > What is it about this change that is defeating the ValueObject printer from compressing this output onto one line? It looks like the contents that get printed are the same, so there's something about switching from a Summary provider to a child provider that's causing problems. We should fix that as people are really picky about variable printing being as space efficient as possible. > To clear up terminology... Strings had data formatters before AND after this change. The difference is that you've switched from a "Summary Provider" data formatter to a "Synthetic Child Provider" data formatter. > > It looks like you've made the printing of std::strings less space efficient. That shouldn't be necessary, and isn't desirable. We should figure out why that's happening and fix that before this change is going to not cause complaints. As there is mentioned in a comment for `FormatManager::ShouldPrintAsOneLiner` ( https://github.com/llvm/llvm-project/blob/main/lldb/source/DataFormatters/FormatManager.cpp#L498 ): ``` // if we decided to define synthetic children for a type, we probably care // enough to show them, but avoid nesting children in children ``` So, there is a condition for that: ``` // but if we only have them to provide a value, keep going if (!synth_sp->MightHaveChildren() && synth_sp->DoesProvideSyntheticValue()) is_synth_val = true; else return false; ``` This patch adds StringSynthetic and this synthetic `MightHaveChildren()` is `true`, therefore `ShouldPrintAsOneLiner` returns `false`. And the printer will use `"()"` or `"{\n ... \n}"` according to whether we return `true` or `false` from this function. If we would like to avoid unnecessary output we probably may ask SyntheticFrontend directly if it would like to print expanded info or not, what do you think about it? https://github.com/llvm/llvm-project/pull/67782 From lldb-commits at lists.llvm.org Thu Oct 5 01:05:53 2023 From: lldb-commits at lists.llvm.org (Pavel Kosov via lldb-commits) Date: Thu, 05 Oct 2023 01:05:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Add support for updating string during debug process (PR #67782) In-Reply-To: Message-ID: <651e6e61.630a0220.add0c.280c@mx.google.com> kpdev wrote: > BTW, I have no problem with the general direction of this change. It makes a lot more sense to ask a synthetic child provider to change a value - since it does represent the value of the ValueObject - rather than the summary which is just some free-form text. And being able to change characters in a string seems a reasonable thing to do, so switching the std::string comprehension from a Summary provider to a Synthetic Child Provider is the way to do that. So that part if fine. > > But std::strings abound in code, and so if we're going to make this change I don't think we can make that printing less space efficient, which this change seems to have done. We should figure out why that's the case and fix for this to be a really good change. So, is it Ok to use current `SetValueFromCString` api from ValueObject to ask synthetic provider to update the underlying string? You mentioned previously that we may add `SetSummaryFromCString` api - in fact currently this is what I am doing - changing the whole string through its summary (please check attached gif for example). But the problem with the new API is the same as for the changing characters through `SBValue::GetData` - IDE doesn't support it https://github.com/llvm/llvm-project/pull/67782 From lldb-commits at lists.llvm.org Thu Oct 5 01:35:24 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Thu, 05 Oct 2023 01:35:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb-vscode] Update installation instructions (PR #68234) In-Reply-To: Message-ID: <651e754c.a70a0220.f24e5.279c@mx.google.com> DavidSpickett wrote: Some general comments but otherwise I followed the instructions myself and it worked fine. I'll let @clayborg give the final ok, since I am new to this stuff. https://github.com/llvm/llvm-project/pull/68234 From lldb-commits at lists.llvm.org Thu Oct 5 02:49:46 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 02:49:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] 3a35ca0 - [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (#68231) Message-ID: <651e86ba.a70a0220.aae49.2a37@mx.google.com> Author: Michael Buch Date: 2023-10-05T10:49:42+01:00 New Revision: 3a35ca01fc55f27315d1652ec1dedff10e79918b URL: https://github.com/llvm/llvm-project/commit/3a35ca01fc55f27315d1652ec1dedff10e79918b DIFF: https://github.com/llvm/llvm-project/commit/3a35ca01fc55f27315d1652ec1dedff10e79918b.diff LOG: [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (#68231) Currently this non-trivial calculation is repeated multiple times, making it hard to reason about when the `byte_offset`/`member_byte_offset` is being set or not. This patch simply moves all those instances of the same calculation into a helper function. We return an optional to remain an NFC patch. Default initializing the offset would make sense but requires further analysis and can be done in a follow-up patch. Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..005711d6f488c7f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -519,6 +519,33 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +static std::optional +ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value, + ModuleSP module_sp) { + // With DWARF 3 and later, if the value is an integer constant, + // this form value is the offset in bytes from the beginning of + // the containing entity. + if (!form_value.BlockData()) + return form_value.Unsigned(); + + Value initialValue(0); + Value memberOffset(0); + const DWARFDataExtractor &debug_info_data = die.GetData(); + uint32_t block_length = form_value.Unsigned(); + uint32_t block_offset = + form_value.BlockData() - debug_info_data.GetDataStart(); + if (!DWARFExpression::Evaluate( + nullptr, // ExecutionContext * + nullptr, // RegisterContext * + module_sp, DataExtractor(debug_info_data, block_offset, block_length), + die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, memberOffset, + nullptr)) { + return {}; + } + + return memberOffset.ResolveValue(nullptr).UInt(); +} + lldb::TypeSP DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const DWARFDIE &die, @@ -1406,26 +1433,9 @@ void DWARFASTParserClang::ParseInheritance( encoding_form = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, nullptr, module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: @@ -2557,29 +2567,9 @@ VariantMember::VariantMember(DWARFDIE &die, lldb::ModuleSP module_sp) { break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, - block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: @@ -2608,28 +2598,9 @@ DiscriminantValue::DiscriminantValue(const DWARFDIE &die, ModuleSP module_sp) { type_ref = form_value; break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + byte_offset = *maybe_offset; break; default: break; @@ -2686,28 +2657,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, data_bit_offset = form_value.Unsigned(); break; case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } + if (auto maybe_offset = + ExtractDataMemberLocation(die, form_value, module_sp)) + member_byte_offset = *maybe_offset; break; case DW_AT_accessibility: From lldb-commits at lists.llvm.org Thu Oct 5 02:49:48 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 02:49:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFCI] Extract DW_AT_data_member_location calculation logic (PR #68231) In-Reply-To: Message-ID: <651e86bc.050a0220.203239.2c2d@mx.google.com> https://github.com/Michael137 closed https://github.com/llvm/llvm-project/pull/68231 From lldb-commits at lists.llvm.org Thu Oct 5 04:03:02 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Thu, 05 Oct 2023 04:03:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651e97e6.a70a0220.11d57.2e29@mx.google.com> ================ @@ -340,6 +340,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { ---------------- kiranchandramohan wrote: Nit: braces to match else. Same for tid below. https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Thu Oct 5 04:03:11 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Thu, 05 Oct 2023 04:03:11 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651e97ef.170a0220.f46ec.380b@mx.google.com> ================ @@ -5748,6 +5758,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); ---------------- kiranchandramohan wrote: Can this be defined close to its use? https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Thu Oct 5 04:03:14 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Thu, 05 Oct 2023 04:03:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651e97f2.170a0220.88b64.369c@mx.google.com> https://github.com/kiranchandramohan edited https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Thu Oct 5 04:03:18 2023 From: lldb-commits at lists.llvm.org (Kiran Chandramohan via lldb-commits) Date: Thu, 05 Oct 2023 04:03:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651e97f6.170a0220.6be75.3566@mx.google.com> ================ @@ -340,6 +340,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); ---------------- kiranchandramohan wrote: Would this delete twice for the `AsPtr` case? https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Thu Oct 5 04:14:57 2023 From: lldb-commits at lists.llvm.org (Jay Foad via lldb-commits) Date: Thu, 05 Oct 2023 04:14:57 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651e9ab1.170a0220.de92e.3758@mx.google.com> jayfoad wrote: I've taken another look at this. The patch does not show any benefit from running another `SIFoldOperands` pass _after_ `SIShrinkInstructions` per se; you get exactly the same results (modulo a couple of add instructions that have their operands commuted differently) if you put the second `SIFoldOperands` run _before_ `SIShrinkInstructions` instead. In other words `SIFoldOperands` is not idempotent, and the reason for the that seems to be: > And the reason it only happens for some SUBREV instructions is even more convoluted. It's because SIFoldOperands will sometimes shrink V_SUB_CO_U32_e64 to V_SUBREV_CO_U32_e32 even it does not manage to fold anything into it. This does seem wrong and is probably worth a closer look. This goes back to https://reviews.llvm.org/D51345. Notice how the code that was added to `updateOperand` does the shrinking but does not actually do any folding; it returns before we get to `Old.ChangeToImmediate`/`Old.substVirtReg`. A second run of `SIFoldOperands` will see the shrunk instruction and fold into it. https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Thu Oct 5 04:28:59 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Thu, 05 Oct 2023 04:28:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651e9dfb.170a0220.ea711.377f@mx.google.com> dtcxzyw wrote: Ping. https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Thu Oct 5 04:38:34 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 04:38:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) Message-ID: https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/68300 **Background** Prior to DWARFv4, there was no clear normative text on how to handle static data members. Non-normative text suggested we compilers should use `DW_AT_external` to mark static data members of structrues/unions. Clang does this consistently. However, GCC doesn't, e.g., when the structure/union is in an anonymous namespace (which is C++ standard conformant). Additionally, GCC never emits `DW_AT_data_member_location`s for union members (regardless of storage linkage and storage duration). Since DWARFv5 (issue 161118.1), static data members get emitted as `DW_TAG_variable`. LLDB used to differentiate between static and non-static members by checking the `DW_AT_external` flag and the absence of `DW_AT_data_member_location`. With D18008 LLDB started to pretend that union members always have a `0` `DW_AT_data_member_location` by default (because GCC never emits these locations). In D124409 LLDB stopped checking the `DW_AT_external` flag to account for the case where GCC doesn't emit the flag for types in anonymous namespaces; instead we only check for presence of `DW_AT_data_member_location`s. The combination of these changes then meant that LLDB would never correctly detect that a union has static data members. **Solution** Instead of unconditionally initializing the `member_byte_offset` to `0` specifically for union members, this patch proposes to check for both the absence of `DW_AT_data_member_location` and `DW_AT_declaration`, which consistently gets emitted for static data members on GCC and Clang. We initialize the `member_byte_offset` to `0` anyway if we determine it wasn't a static. So removing the special case for unions makes this code simpler to reason about. Long-term, we should just use DWARFv5's new representation for static data members. Fixes #68135 >From 30ef50b808a8458a60bbd3cdc52b866ee296b6ba Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Thu, 5 Oct 2023 12:13:12 +0100 Subject: [PATCH] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members **Background** Prior to DWARFv4, there was no clear normative text on how to handle static data members. Non-normative text suggested we compilers should use `DW_AT_external` to mark static data members of structrues/unions. Clang does this consistently. However, GCC doesn't, e.g., when the structure/union is in an anonymous namespace (which is C++ standard conformant). Additionally, GCC never emits `DW_AT_data_member_location`s for union members (regardless of storage linkage and storage duration). Since DWARFv5 (issue 161118.1), static data members get emitted as `DW_TAG_variable`. LLDB used to differentiate between static and non-static members by checking the `DW_AT_external` flag and the absence of `DW_AT_data_member_location`. With D18008 LLDB started to pretend that union members always have a `0` `DW_AT_data_member_location` by default (because GCC never emits these locations). In D124409 LLDB stopped checking the `DW_AT_external` flag to account for the case where GCC doesn't emit the flag for types in anonymous namespaces; instead we only check for presence of `DW_AT_data_member_location`s. The combination of these changes then meant that LLDB would never correctly detect that a union has static data members. **Solution** Instead of unconditionally initializing the `member_byte_offset` to `0` specifically for union members, this patch proposes to check for both the absence of `DW_AT_data_member_location` and `DW_AT_declaration`, which consistently gets emitted for static data members on GCC and Clang. We initialize the `member_byte_offset` to `0` anyway if we determine it wasn't a static. So removing the special case for unions makes this code simpler to reason about. Long-term, we should just use DWARFv5's new representation for static data members. Fixes #68135 --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 22 +++++++--- .../cpp/union-static-data-members/Makefile | 3 ++ .../TestCppUnionStaticMembers.py | 43 +++++++++++++++++++ .../cpp/union-static-data-members/main.cpp | 25 +++++++++++ 4 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/Makefile create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/main.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..ee35a7de80c1e18 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2482,8 +2482,9 @@ struct MemberAttributes { DWARFFormValue encoding_form; /// Indicates the byte offset of the word from the base address of the /// structure. - uint32_t member_byte_offset; + uint32_t member_byte_offset = UINT32_MAX; bool is_artificial = false; + bool is_declaration = false; }; /// Parsed form of all attributes that are relevant for parsing Objective-C @@ -2656,8 +2657,6 @@ DiscriminantValue &VariantPart::discriminant() { return this->_discriminant; } MemberAttributes::MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, ModuleSP module_sp) { - member_byte_offset = (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - DWARFAttributes attributes = die.GetAttributes(); for (size_t i = 0; i < attributes.Size(); ++i) { const dw_attr_t attr = attributes.AttributeAtIndex(i); @@ -2717,6 +2716,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, case DW_AT_artificial: is_artificial = form_value.Boolean(); break; + case DW_AT_declaration: + is_declaration = form_value.Boolean(); + break; default: break; } @@ -2923,10 +2925,18 @@ void DWARFASTParserClang::ParseSingleMember( if (class_is_objc_object_or_interface) attrs.accessibility = eAccessNone; - // Handle static members, which is any member that doesn't have a bit or a - // byte member offset. + // Handle static members, which are typically members without + // locations. However, GCC *never* emits DW_AT_data_member_location + // for static data members of unions. + // Non-normative text pre-DWARFv5 recommends marking static + // data members with an DW_AT_external flag. Clang emits this consistently + // whereas GCC emits it only for static data members if not part of an + // anonymous namespace. The flag that is consistently emitted for static + // data members is DW_AT_declaration, so we check it instead. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we can + // consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && - attrs.data_bit_offset == UINT64_MAX) { + attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); if (var_type) { diff --git a/lldb/test/API/lang/cpp/union-static-data-members/Makefile b/lldb/test/API/lang/cpp/union-static-data-members/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py new file mode 100644 index 000000000000000..47166636b12647c --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -0,0 +1,43 @@ +""" +Tests that frame variable and expr work for +C++ unions and their static data members. +""" +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil + +class CppUnionStaticMembersTestCase(TestBase): + def test(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + + self.expect("frame variable foo", substrs=["val = 42"]) + self.expect("frame variable bar", substrs=["val = 137"]) + + self.expect_expr("foo", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + self.expect_expr("bar", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) + + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") + self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + + @expectedFailureAll + def test_union_in_anon_namespace(self): + """Tests that frame variable and expr work + for union static data members in anonymous + namespaces""" + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") + self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/main.cpp b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp new file mode 100644 index 000000000000000..8ba0312cd3a618b --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp @@ -0,0 +1,25 @@ +union Foo { + int val = 42; + static const int sVal1 = -42; + static Foo sVal2; +}; + +Foo Foo::sVal2{}; + +namespace { +union Bar { + int val = 137; + static const int sVal1 = -137; + static Bar sVal2; +}; + +Bar Bar::sVal2{}; +} // namespace + +int main() { + Foo foo; + Bar bar; + auto sum = Bar::sVal1 + Foo::sVal1 + Foo::sVal2.val + Bar::sVal2.val; + + return 0; +} From lldb-commits at lists.llvm.org Thu Oct 5 04:39:11 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 04:39:11 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea05f.170a0220.f676d.3697@mx.google.com> Michael137 wrote: Alternatively, we could start checking `DW_AT_external` again, at the cost of not supporting some GCC cases pre-DWARFv5 https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 04:39:40 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 04:39:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea07c.170a0220.e1370.38c2@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes **Background** Prior to DWARFv4, there was no clear normative text on how to handle static data members. Non-normative text suggested we compilers should use `DW_AT_external` to mark static data members of structrues/unions. Clang does this consistently. However, GCC doesn't, e.g., when the structure/union is in an anonymous namespace (which is C++ standard conformant). Additionally, GCC never emits `DW_AT_data_member_location`s for union members (regardless of storage linkage and storage duration). Since DWARFv5 (issue 161118.1), static data members get emitted as `DW_TAG_variable`. LLDB used to differentiate between static and non-static members by checking the `DW_AT_external` flag and the absence of `DW_AT_data_member_location`. With D18008 LLDB started to pretend that union members always have a `0` `DW_AT_data_member_location` by default (because GCC never emits these locations). In D124409 LLDB stopped checking the `DW_AT_external` flag to account for the case where GCC doesn't emit the flag for types in anonymous namespaces; instead we only check for presence of `DW_AT_data_member_location`s. The combination of these changes then meant that LLDB would never correctly detect that a union has static data members. **Solution** Instead of unconditionally initializing the `member_byte_offset` to `0` specifically for union members, this patch proposes to check for both the absence of `DW_AT_data_member_location` and `DW_AT_declaration`, which consistently gets emitted for static data members on GCC and Clang. We initialize the `member_byte_offset` to `0` anyway if we determine it wasn't a static. So removing the special case for unions makes this code simpler to reason about. Long-term, we should just use DWARFv5's new representation for static data members. Fixes #68135 --- Full diff: https://github.com/llvm/llvm-project/pull/68300.diff 4 Files Affected: - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp (+16-6) - (added) lldb/test/API/lang/cpp/union-static-data-members/Makefile (+3) - (added) lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py (+43) - (added) lldb/test/API/lang/cpp/union-static-data-members/main.cpp (+25) ``````````diff diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..ee35a7de80c1e18 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2482,8 +2482,9 @@ struct MemberAttributes { DWARFFormValue encoding_form; /// Indicates the byte offset of the word from the base address of the /// structure. - uint32_t member_byte_offset; + uint32_t member_byte_offset = UINT32_MAX; bool is_artificial = false; + bool is_declaration = false; }; /// Parsed form of all attributes that are relevant for parsing Objective-C @@ -2656,8 +2657,6 @@ DiscriminantValue &VariantPart::discriminant() { return this->_discriminant; } MemberAttributes::MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, ModuleSP module_sp) { - member_byte_offset = (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - DWARFAttributes attributes = die.GetAttributes(); for (size_t i = 0; i < attributes.Size(); ++i) { const dw_attr_t attr = attributes.AttributeAtIndex(i); @@ -2717,6 +2716,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, case DW_AT_artificial: is_artificial = form_value.Boolean(); break; + case DW_AT_declaration: + is_declaration = form_value.Boolean(); + break; default: break; } @@ -2923,10 +2925,18 @@ void DWARFASTParserClang::ParseSingleMember( if (class_is_objc_object_or_interface) attrs.accessibility = eAccessNone; - // Handle static members, which is any member that doesn't have a bit or a - // byte member offset. + // Handle static members, which are typically members without + // locations. However, GCC *never* emits DW_AT_data_member_location + // for static data members of unions. + // Non-normative text pre-DWARFv5 recommends marking static + // data members with an DW_AT_external flag. Clang emits this consistently + // whereas GCC emits it only for static data members if not part of an + // anonymous namespace. The flag that is consistently emitted for static + // data members is DW_AT_declaration, so we check it instead. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we can + // consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && - attrs.data_bit_offset == UINT64_MAX) { + attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); if (var_type) { diff --git a/lldb/test/API/lang/cpp/union-static-data-members/Makefile b/lldb/test/API/lang/cpp/union-static-data-members/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py new file mode 100644 index 000000000000000..47166636b12647c --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -0,0 +1,43 @@ +""" +Tests that frame variable and expr work for +C++ unions and their static data members. +""" +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil + +class CppUnionStaticMembersTestCase(TestBase): + def test(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + + self.expect("frame variable foo", substrs=["val = 42"]) + self.expect("frame variable bar", substrs=["val = 137"]) + + self.expect_expr("foo", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + self.expect_expr("bar", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) + + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") + self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + + @expectedFailureAll + def test_union_in_anon_namespace(self): + """Tests that frame variable and expr work + for union static data members in anonymous + namespaces""" + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") + self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/main.cpp b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp new file mode 100644 index 000000000000000..8ba0312cd3a618b --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp @@ -0,0 +1,25 @@ +union Foo { + int val = 42; + static const int sVal1 = -42; + static Foo sVal2; +}; + +Foo Foo::sVal2{}; + +namespace { +union Bar { + int val = 137; + static const int sVal1 = -137; + static Bar sVal2; +}; + +Bar Bar::sVal2{}; +} // namespace + +int main() { + Foo foo; + Bar bar; + auto sum = Bar::sVal1 + Foo::sVal1 + Foo::sVal2.val + Bar::sVal2.val; + + return 0; +} ``````````
https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 04:41:58 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 04:41:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea106.630a0220.82380.3583@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 04:43:19 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 04:43:19 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea157.a70a0220.8fca3.2fa9@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 04:43:40 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 04:43:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea16c.630a0220.424c1.2f92@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 04:50:49 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 04:50:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea319.a70a0220.8a850.3120@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff 777a6e6f10b2b90496d248b7fa904fce834484be 30ef50b808a8458a60bbd3cdc52b866ee296b6ba -- lldb/test/API/lang/cpp/union-static-data-members/main.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index ee35a7de80c1..436632473816 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2933,8 +2933,8 @@ void DWARFASTParserClang::ParseSingleMember( // whereas GCC emits it only for static data members if not part of an // anonymous namespace. The flag that is consistently emitted for static // data members is DW_AT_declaration, so we check it instead. - // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we can - // consistently detect them on both GCC and Clang without below heuristic. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we + // can consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); ``````````
https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 05:08:03 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Thu, 05 Oct 2023 05:08:03 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651ea723.170a0220.ddda5.393e@mx.google.com> https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/68300 >From 30ef50b808a8458a60bbd3cdc52b866ee296b6ba Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Thu, 5 Oct 2023 12:13:12 +0100 Subject: [PATCH 1/2] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members **Background** Prior to DWARFv4, there was no clear normative text on how to handle static data members. Non-normative text suggested we compilers should use `DW_AT_external` to mark static data members of structrues/unions. Clang does this consistently. However, GCC doesn't, e.g., when the structure/union is in an anonymous namespace (which is C++ standard conformant). Additionally, GCC never emits `DW_AT_data_member_location`s for union members (regardless of storage linkage and storage duration). Since DWARFv5 (issue 161118.1), static data members get emitted as `DW_TAG_variable`. LLDB used to differentiate between static and non-static members by checking the `DW_AT_external` flag and the absence of `DW_AT_data_member_location`. With D18008 LLDB started to pretend that union members always have a `0` `DW_AT_data_member_location` by default (because GCC never emits these locations). In D124409 LLDB stopped checking the `DW_AT_external` flag to account for the case where GCC doesn't emit the flag for types in anonymous namespaces; instead we only check for presence of `DW_AT_data_member_location`s. The combination of these changes then meant that LLDB would never correctly detect that a union has static data members. **Solution** Instead of unconditionally initializing the `member_byte_offset` to `0` specifically for union members, this patch proposes to check for both the absence of `DW_AT_data_member_location` and `DW_AT_declaration`, which consistently gets emitted for static data members on GCC and Clang. We initialize the `member_byte_offset` to `0` anyway if we determine it wasn't a static. So removing the special case for unions makes this code simpler to reason about. Long-term, we should just use DWARFv5's new representation for static data members. Fixes #68135 --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 22 +++++++--- .../cpp/union-static-data-members/Makefile | 3 ++ .../TestCppUnionStaticMembers.py | 43 +++++++++++++++++++ .../cpp/union-static-data-members/main.cpp | 25 +++++++++++ 4 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/Makefile create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py create mode 100644 lldb/test/API/lang/cpp/union-static-data-members/main.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 37fb16d4e0351c9..ee35a7de80c1e18 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2482,8 +2482,9 @@ struct MemberAttributes { DWARFFormValue encoding_form; /// Indicates the byte offset of the word from the base address of the /// structure. - uint32_t member_byte_offset; + uint32_t member_byte_offset = UINT32_MAX; bool is_artificial = false; + bool is_declaration = false; }; /// Parsed form of all attributes that are relevant for parsing Objective-C @@ -2656,8 +2657,6 @@ DiscriminantValue &VariantPart::discriminant() { return this->_discriminant; } MemberAttributes::MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, ModuleSP module_sp) { - member_byte_offset = (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - DWARFAttributes attributes = die.GetAttributes(); for (size_t i = 0; i < attributes.Size(); ++i) { const dw_attr_t attr = attributes.AttributeAtIndex(i); @@ -2717,6 +2716,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, case DW_AT_artificial: is_artificial = form_value.Boolean(); break; + case DW_AT_declaration: + is_declaration = form_value.Boolean(); + break; default: break; } @@ -2923,10 +2925,18 @@ void DWARFASTParserClang::ParseSingleMember( if (class_is_objc_object_or_interface) attrs.accessibility = eAccessNone; - // Handle static members, which is any member that doesn't have a bit or a - // byte member offset. + // Handle static members, which are typically members without + // locations. However, GCC *never* emits DW_AT_data_member_location + // for static data members of unions. + // Non-normative text pre-DWARFv5 recommends marking static + // data members with an DW_AT_external flag. Clang emits this consistently + // whereas GCC emits it only for static data members if not part of an + // anonymous namespace. The flag that is consistently emitted for static + // data members is DW_AT_declaration, so we check it instead. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we can + // consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && - attrs.data_bit_offset == UINT64_MAX) { + attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); if (var_type) { diff --git a/lldb/test/API/lang/cpp/union-static-data-members/Makefile b/lldb/test/API/lang/cpp/union-static-data-members/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py new file mode 100644 index 000000000000000..47166636b12647c --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -0,0 +1,43 @@ +""" +Tests that frame variable and expr work for +C++ unions and their static data members. +""" +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil + +class CppUnionStaticMembersTestCase(TestBase): + def test(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + + self.expect("frame variable foo", substrs=["val = 42"]) + self.expect("frame variable bar", substrs=["val = 137"]) + + self.expect_expr("foo", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + self.expect_expr("bar", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) + + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") + self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + + @expectedFailureAll + def test_union_in_anon_namespace(self): + """Tests that frame variable and expr work + for union static data members in anonymous + namespaces""" + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") + self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/main.cpp b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp new file mode 100644 index 000000000000000..8ba0312cd3a618b --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp @@ -0,0 +1,25 @@ +union Foo { + int val = 42; + static const int sVal1 = -42; + static Foo sVal2; +}; + +Foo Foo::sVal2{}; + +namespace { +union Bar { + int val = 137; + static const int sVal1 = -137; + static Bar sVal2; +}; + +Bar Bar::sVal2{}; +} // namespace + +int main() { + Foo foo; + Bar bar; + auto sum = Bar::sVal1 + Foo::sVal1 + Foo::sVal2.val + Bar::sVal2.val; + + return 0; +} >From a3135e3991d077d84e4bace52a85bfbfbe96cc31 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Thu, 5 Oct 2023 13:07:47 +0100 Subject: [PATCH 2/2] fixup! git clang-format --- lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index ee35a7de80c1e18..43663247381664c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2933,8 +2933,8 @@ void DWARFASTParserClang::ParseSingleMember( // whereas GCC emits it only for static data members if not part of an // anonymous namespace. The flag that is consistently emitted for static // data members is DW_AT_declaration, so we check it instead. - // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we can - // consistently detect them on both GCC and Clang without below heuristic. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we + // can consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); From lldb-commits at lists.llvm.org Thu Oct 5 07:54:53 2023 From: lldb-commits at lists.llvm.org (Christian Kissig via lldb-commits) Date: Thu, 05 Oct 2023 07:54:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [Support] Add KnownBits::computeForSubBorrow (PR #67788) In-Reply-To: Message-ID: <651ece3d.170a0220.1f2ff.534f@mx.google.com> https://github.com/christiankissig updated https://github.com/llvm/llvm-project/pull/67788 >From 5d86936c3a48c613460983c980271fcab8128b75 Mon Sep 17 00:00:00 2001 From: Christian Kissig Date: Tue, 26 Sep 2023 12:18:59 +0000 Subject: [PATCH 1/5] [Support] Add KnownBits::computeForSubBorrow * Implements computeForSubBorrow as alias for computeforAddCarry. Borrow is expected to be 1-bit wide. * Adds exhaustive unit test. --- llvm/include/llvm/Support/KnownBits.h | 4 +++ llvm/lib/Support/KnownBits.cpp | 12 +++++++++ llvm/unittests/Support/KnownBitsTest.cpp | 31 ++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 8462aa11202d5d7..711ca8c12129a1b 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -332,6 +332,10 @@ struct KnownBits { static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS); + /// Compute known bits results from subtracting RHS from LHS. + static KnownBits computeForSubBorrow(const KnownBits &LHS, KnownBits RHS, + const KnownBits &Borrow); + /// Compute knownbits resulting from llvm.sadd.sat(LHS, RHS) static KnownBits sadd_sat(const KnownBits &LHS, const KnownBits &RHS); diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 097c22d33dd12ba..99ac50a34666fce 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -85,6 +85,18 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW, return KnownOut; } +KnownBits KnownBits::computeForSubBorrow(const KnownBits &LHS, KnownBits RHS, + const KnownBits &Borrow) { + assert(Borrow.getBitWidth() == 1 && "Borrow must be 1-bit"); + + // LHS - RHS = LHS + ~RHS + 1 + // Carry 1 - Borrow in ::computeForAddCarry + std::swap(RHS.Zero, RHS.One); + return ::computeForAddCarry(LHS, RHS, + /*CarryZero*/ Borrow.One.getBoolValue(), + /*CarryOne*/ Borrow.Zero.getBoolValue()); +} + KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const { unsigned BitWidth = getBitWidth(); assert(0 < SrcBitWidth && SrcBitWidth <= BitWidth && diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index 9d184beea3ba9e9..5597d69ab248d23 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -213,6 +213,37 @@ TEST(KnownBitsTest, AddSubExhaustive) { TestAddSubExhaustive(false); } +TEST(KnownBitsTest, SubBorrowExhaustive) { + unsigned Bits = 4; + ForeachKnownBits(Bits, [&](const KnownBits &Known1) { + ForeachKnownBits(Bits, [&](const KnownBits &Known2) { + ForeachKnownBits(1, [&](const KnownBits &KnownBorrow) { + // Explicitly compute known bits of the addition by trying all + // possibilities. + KnownBits Known(Bits); + Known.Zero.setAllBits(); + Known.One.setAllBits(); + ForeachNumInKnownBits(Known1, [&](const APInt &N1) { + ForeachNumInKnownBits(Known2, [&](const APInt &N2) { + ForeachNumInKnownBits(KnownBorrow, [&](const APInt &Borrow) { + APInt Sub = N1 - N2; + if (Borrow.getBoolValue()) + --Sub; + + Known.One &= Sub; + Known.Zero &= ~Sub; + }); + }); + }); + + KnownBits KnownComputed = + KnownBits::computeForSubBorrow(Known1, Known2, KnownBorrow); + EXPECT_EQ(Known, KnownComputed); + }); + }); + }); +} + TEST(KnownBitsTest, BinaryExhaustive) { testBinaryOpExhaustive( [](const KnownBits &Known1, const KnownBits &Known2) { >From f84c882cf429df238054d88ee07e41a08ae3fd6c Mon Sep 17 00:00:00 2001 From: Christian Kissig Date: Tue, 26 Sep 2023 18:02:49 +0000 Subject: [PATCH 2/5] [CodeGen] Implement USUBC, USUBO_CARRY, and SSUBO_CARRY with KnownBits::computeForSubBorrow --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 ++++++---- .../CodeGen/AArch64SelectionDAGTest.cpp | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cd21af770e1a4d9..ab3e9b4bdc67402 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3732,14 +3732,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert(Op.getResNo() == 0 && "We only compute knownbits for the difference here."); - // TODO: Compute influence of the carry operand. + // With UADDO_CARRY and SSUBO_CARRY a borrow bit may be added in. + KnownBits Borrow(1); if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) - break; + // TODO: Compute known bits for the carry operand. Creates + // parity with UADDO_CARRY And SADDO_CARRY as of now. + Borrow.resetAll(); + else + Borrow.setAllZero(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, - Known, Known2); + Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow); break; } case ISD::UADDO: diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 0e1f2736907fff8..303ee50a763fba3 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -273,6 +273,30 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_SUB) { EXPECT_EQ(Known.One, APInt(8, 0x1)); } +// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. +TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto N0 = DAG->getConstant(0x5a, Loc, IntVT); + auto UnknownOp1 = DAG->getRegister(0, IntVT); // ???????? + auto Mask1_Zero = DAG->getConstant(0x8, Loc, IntVT); // 00001000 + auto Mask1_One = DAG->getConstant(0x20, Loc, IntVT); // 00100000 + // N1 = (???????? & 00001000) | 00100000 = 0010?000 + auto N1 = DAG->getNode(ISD::AND, Loc, IntVT, Mask1_Zero, UnknownOp1); + N1 = DAG->getNode(ISD::OR, Loc, IntVT, Mask1_One, N1); + auto UnknownOpC = DAG->getRegister(1, IntVT); + auto Op = DAG->getNode(ISD::USUBO_CARRY, Loc, IntVT, N0, N1, UnknownOpC); + // N0 = 01011010 + // N1 = 0010?000 + // C = ? + // => + // Known.Zero = 11000100 (0xc4) + // Known.One = 00110000 (0x30) + KnownBits Known = DAG->computeKnownBits(Op); + EXPECT_EQ(Known.Zero, APInt(8, 0xc4)); + EXPECT_EQ(Known.One, APInt(8, 0x30)); +} + TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) { TargetLowering TL(*TM); >From 6355b54bafcec96770624a6a5df8e0c299be8f3f Mon Sep 17 00:00:00 2001 From: Christian Kissig Date: Wed, 27 Sep 2023 12:35:59 +0000 Subject: [PATCH 3/5] [CodeGen] Compute unknown bits for Carry/Borrow for ADD/SUB Computes known bits for carry/borrow for UADDO_CARRY and USUBO_CARRY Operations. Carry/borrow are expected to be 1-bit. 0 bits are padded with unknown. Adds a unit test for UADDO_CARRY and USUBO_CARRY. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 23 ++--- .../CodeGen/AArch64SelectionDAGTest.cpp | 95 +++++++++++++++++-- 2 files changed, 99 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ab3e9b4bdc67402..5c01baa9dedff4e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3734,11 +3734,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // With UADDO_CARRY and SSUBO_CARRY a borrow bit may be added in. KnownBits Borrow(1); - if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) - // TODO: Compute known bits for the carry operand. Creates - // parity with UADDO_CARRY And SADDO_CARRY as of now. - Borrow.resetAll(); - else + if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { + Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Borrow has bit width 1 + Borrow = Borrow.zextOrTrunc(1); + } else Borrow.setAllZero(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -3768,14 +3768,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Opcode == ISD::ADDE) // Can't track carry from glue, set carry to unknown. Carry.resetAll(); - else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) - // TODO: Compute known bits for the carry operand. Not sure if it is worth - // the trouble (how often will we find a known carry bit). And I haven't - // tested this very much yet, but something like this might work: - // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); - // Carry = Carry.zextOrTrunc(1, false); - Carry.resetAll(); - else + else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { + Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry has bit width 1 + Carry = Carry.zextOrTrunc(1); + } else Carry.setAllZero(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 303ee50a763fba3..0430c74f7d17ca8 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -254,6 +254,59 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_ADD) { EXPECT_EQ(Known.One, APInt(8, 0x55)); } +// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. +TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) { + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto UnknownOp = DAG->getRegister(0, IntVT); + auto Mask_Zero = DAG->getConstant(0x28, Loc, IntVT); + auto Mask_One = DAG->getConstant(0x20, Loc, IntVT); + auto N0 = DAG->getNode(ISD::AND, Loc, IntVT, Mask_Zero, UnknownOp); + N0 = DAG->getNode(ISD::OR, Loc, IntVT, Mask_One, N0); + auto N1 = DAG->getConstant(0x65, Loc, IntVT); + + KnownBits Known; + + auto UnknownBorrow = DAG->getRegister(1, IntVT); + auto OpUnknownBorrow = DAG->getNode( + ISD::UADDO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); + // N0 = 0010?000 + // N1 = 01100101 + // B = ? + // => + // Known.Zero = 01110000 (0x70) + // Known.One = 10000100 (0x84) + Known = DAG->computeKnownBits(OpUnknownBorrow); + EXPECT_EQ(Known.Zero, APInt(8, 0x70)); + EXPECT_EQ(Known.One, APInt(8, 0x84)); + + auto ZeroBorrow = DAG->getConstant(0x0, Loc, IntVT); + auto OpZeroBorrow = DAG->getNode( + ISD::UADDO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); + // N0 = 0010?000 + // N1 = 01100101 + // B = 0 + // => + // Known.Zero = 01110010 (0x72) + // Known.One = 10000101 (0x85) + Known = DAG->computeKnownBits(OpZeroBorrow); + EXPECT_EQ(Known.Zero, APInt(8, 0x72)); + EXPECT_EQ(Known.One, APInt(8, 0x85)); + + auto OneBorrow = DAG->getConstant(0x1, Loc, IntVT); + auto OpOneBorrow = DAG->getNode( + ISD::UADDO_CARRY, Loc, IntVT, N0, N1, OneBorrow); + // N0 = 0010?000 + // N1 = 01100101 + // B = 1 + // => + // Known.Zero = 01110001 (0x71) + // Known.One = 10000110 (0x86) + Known = DAG->computeKnownBits(OpOneBorrow); + EXPECT_EQ(Known.Zero, APInt(8, 0x71)); + EXPECT_EQ(Known.One, APInt(8, 0x86)); +} + // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_SUB) { SDLoc Loc; @@ -278,23 +331,53 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { SDLoc Loc; auto IntVT = EVT::getIntegerVT(Context, 8); auto N0 = DAG->getConstant(0x5a, Loc, IntVT); - auto UnknownOp1 = DAG->getRegister(0, IntVT); // ???????? + auto UnknownOp = DAG->getRegister(0, IntVT); // ???????? auto Mask1_Zero = DAG->getConstant(0x8, Loc, IntVT); // 00001000 auto Mask1_One = DAG->getConstant(0x20, Loc, IntVT); // 00100000 // N1 = (???????? & 00001000) | 00100000 = 0010?000 - auto N1 = DAG->getNode(ISD::AND, Loc, IntVT, Mask1_Zero, UnknownOp1); + auto N1 = DAG->getNode(ISD::AND, Loc, IntVT, Mask1_Zero, UnknownOp); N1 = DAG->getNode(ISD::OR, Loc, IntVT, Mask1_One, N1); - auto UnknownOpC = DAG->getRegister(1, IntVT); - auto Op = DAG->getNode(ISD::USUBO_CARRY, Loc, IntVT, N0, N1, UnknownOpC); + + KnownBits Known; + + auto UnknownBorrow = DAG->getRegister(1, IntVT); + auto OpUnknownBorrow = DAG->getNode( + ISD::USUBO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); // N0 = 01011010 // N1 = 0010?000 - // C = ? + // B = ? // => // Known.Zero = 11000100 (0xc4) // Known.One = 00110000 (0x30) - KnownBits Known = DAG->computeKnownBits(Op); + Known = DAG->computeKnownBits(OpUnknownBorrow); EXPECT_EQ(Known.Zero, APInt(8, 0xc4)); EXPECT_EQ(Known.One, APInt(8, 0x30)); + + auto ZeroBorrow = DAG->getConstant(0x0, Loc, IntVT); + auto OpZeroBorrow = DAG->getNode( + ISD::USUBO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); + // N0 = 01011010 + // N1 = 0010?000 + // B = 0 + // => + // Known.Zero = 11000101 (0xc5) + // Known.One = 00110010 (0x32) + Known = DAG->computeKnownBits(OpZeroBorrow); + EXPECT_EQ(Known.Zero, APInt(8, 0xc5)); + EXPECT_EQ(Known.One, APInt(8, 0x32)); + + auto OneBorrow = DAG->getConstant(0x1, Loc, IntVT); + auto OpOneBorrow = DAG->getNode( + ISD::USUBO_CARRY, Loc, IntVT, N0, N1, OneBorrow); + // N0 = 01011010 + // N1 = 0010?000 + // B = 1 + // => + // Known.Zero = 11000110 (0xc6) + // Known.One = 00110001 (0x31) + Known = DAG->computeKnownBits(OpOneBorrow); + EXPECT_EQ(Known.Zero, APInt(8, 0xc6)); + EXPECT_EQ(Known.One, APInt(8, 0x31)); } TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) { >From 108d899c04aa80e2d65fc34b49e40ae82594cf10 Mon Sep 17 00:00:00 2001 From: Christian Kissig Date: Thu, 28 Sep 2023 11:50:25 +0000 Subject: [PATCH 4/5] [CodeGen] Compute known bits of Carry/Borrow for UADDO, SADDO, USUBO, and SSUBO Adds computeKnownBits for Carry/Borrow for UADDO, SADDO, USUBO, and SSUBO. Carry over is expected to be of bit width 1. Adds unit tests for UADDO_CARRY and USUBO_CARRY. --- .../CodeGen/AArch64SelectionDAGTest.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 0430c74f7d17ca8..bb8e76a2eeb8beb 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -268,8 +268,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) { KnownBits Known; auto UnknownBorrow = DAG->getRegister(1, IntVT); - auto OpUnknownBorrow = DAG->getNode( - ISD::UADDO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); + auto OpUnknownBorrow = + DAG->getNode(ISD::UADDO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); // N0 = 0010?000 // N1 = 01100101 // B = ? @@ -281,8 +281,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) { EXPECT_EQ(Known.One, APInt(8, 0x84)); auto ZeroBorrow = DAG->getConstant(0x0, Loc, IntVT); - auto OpZeroBorrow = DAG->getNode( - ISD::UADDO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); + auto OpZeroBorrow = + DAG->getNode(ISD::UADDO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); // N0 = 0010?000 // N1 = 01100101 // B = 0 @@ -294,8 +294,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) { EXPECT_EQ(Known.One, APInt(8, 0x85)); auto OneBorrow = DAG->getConstant(0x1, Loc, IntVT); - auto OpOneBorrow = DAG->getNode( - ISD::UADDO_CARRY, Loc, IntVT, N0, N1, OneBorrow); + auto OpOneBorrow = + DAG->getNode(ISD::UADDO_CARRY, Loc, IntVT, N0, N1, OneBorrow); // N0 = 0010?000 // N1 = 01100101 // B = 1 @@ -304,7 +304,7 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) { // Known.One = 10000110 (0x86) Known = DAG->computeKnownBits(OpOneBorrow); EXPECT_EQ(Known.Zero, APInt(8, 0x71)); - EXPECT_EQ(Known.One, APInt(8, 0x86)); + EXPECT_EQ(Known.One, APInt(8, 0x86)); } // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. @@ -331,7 +331,7 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { SDLoc Loc; auto IntVT = EVT::getIntegerVT(Context, 8); auto N0 = DAG->getConstant(0x5a, Loc, IntVT); - auto UnknownOp = DAG->getRegister(0, IntVT); // ???????? + auto UnknownOp = DAG->getRegister(0, IntVT); // ???????? auto Mask1_Zero = DAG->getConstant(0x8, Loc, IntVT); // 00001000 auto Mask1_One = DAG->getConstant(0x20, Loc, IntVT); // 00100000 // N1 = (???????? & 00001000) | 00100000 = 0010?000 @@ -341,8 +341,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { KnownBits Known; auto UnknownBorrow = DAG->getRegister(1, IntVT); - auto OpUnknownBorrow = DAG->getNode( - ISD::USUBO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); + auto OpUnknownBorrow = + DAG->getNode(ISD::USUBO_CARRY, Loc, IntVT, N0, N1, UnknownBorrow); // N0 = 01011010 // N1 = 0010?000 // B = ? @@ -354,8 +354,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { EXPECT_EQ(Known.One, APInt(8, 0x30)); auto ZeroBorrow = DAG->getConstant(0x0, Loc, IntVT); - auto OpZeroBorrow = DAG->getNode( - ISD::USUBO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); + auto OpZeroBorrow = + DAG->getNode(ISD::USUBO_CARRY, Loc, IntVT, N0, N1, ZeroBorrow); // N0 = 01011010 // N1 = 0010?000 // B = 0 @@ -367,8 +367,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_USUBO_CARRY) { EXPECT_EQ(Known.One, APInt(8, 0x32)); auto OneBorrow = DAG->getConstant(0x1, Loc, IntVT); - auto OpOneBorrow = DAG->getNode( - ISD::USUBO_CARRY, Loc, IntVT, N0, N1, OneBorrow); + auto OpOneBorrow = + DAG->getNode(ISD::USUBO_CARRY, Loc, IntVT, N0, N1, OneBorrow); // N0 = 01011010 // N1 = 0010?000 // B = 1 >From 3e91c2b0798659e48643fe3806200fb18afc812a Mon Sep 17 00:00:00 2001 From: Christian Kissig Date: Fri, 29 Sep 2023 13:01:14 +0000 Subject: [PATCH 5/5] [CodeGen] Fix typo --- llvm/unittests/Support/KnownBitsTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index 5597d69ab248d23..c0377d45c303a11 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -218,7 +218,7 @@ TEST(KnownBitsTest, SubBorrowExhaustive) { ForeachKnownBits(Bits, [&](const KnownBits &Known1) { ForeachKnownBits(Bits, [&](const KnownBits &Known2) { ForeachKnownBits(1, [&](const KnownBits &KnownBorrow) { - // Explicitly compute known bits of the addition by trying all + // Explicitly compute known bits of the subtraction by trying all // possibilities. KnownBits Known(Bits); Known.Zero.setAllBits(); From lldb-commits at lists.llvm.org Thu Oct 5 09:46:15 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 09:46:15 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Add support for updating string during debug process (PR #67782) In-Reply-To: Message-ID: <651ee857.630a0220.f5952.4a1d@mx.google.com> jimingham wrote: I don't know what the status of lldb-mi is, but lldb-vscode which does the same job (be a DAP server) is under active development. So "this adaptor doesn't use the better method to do X" shouldn't be a reason to not employ the better method. We should just fix the adaptors. I entertained the idea of "value setting summaries" but rejected it after some thought. Summaries are unstructured short developer notes about the value, they really aren't suitable as general stand-in's for the value. So that's really not an appropriate route for changing values. But the Synthetic Child Providers are a re-presentation of the value, so they are a suitable route to do value changing for more complex object types. I haven't read carefully through the ValueObject printer in a while, but we do compress the printing of "small" structures in some cases, so it should be possible to get it not to expand this output. I think people will not see it as a positive if these common objects start taking more space, so it would be good to investigate how to do that. https://github.com/llvm/llvm-project/pull/67782 From lldb-commits at lists.llvm.org Thu Oct 5 09:50:36 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 09:50:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651ee95c.050a0220.535ee.4d3b@mx.google.com> goldsteinn wrote: Continue to LGTM... https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Thu Oct 5 10:00:59 2023 From: lldb-commits at lists.llvm.org (Stanislav Mekhanoshin via lldb-commits) Date: Thu, 05 Oct 2023 10:00:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add another SIFoldOperands instance after shrink (PR #67878) In-Reply-To: Message-ID: <651eebcb.170a0220.b13de.5c86@mx.google.com> rampitec wrote: > I've taken another look at this. The patch does not show any benefit from running another `SIFoldOperands` pass _after_ `SIShrinkInstructions` per se; you get exactly the same results (modulo a couple of add instructions that have their operands commuted differently) if you put the second `SIFoldOperands` run _before_ `SIShrinkInstructions` instead. > > In other words `SIFoldOperands` is not idempotent, and the reason for the that seems to be: > > > And the reason it only happens for some SUBREV instructions is even more convoluted. It's because SIFoldOperands will sometimes shrink V_SUB_CO_U32_e64 to V_SUBREV_CO_U32_e32 even it does not manage to fold anything into it. This does seem wrong and is probably worth a closer look. > > This goes back to https://reviews.llvm.org/D51345. Notice how the code that was added to `updateOperand` does the shrinking but does not actually do any folding; it returns before we get to `Old.ChangeToImmediate`/`Old.substVirtReg`. A second run of `SIFoldOperands` will see the shrunk instruction and fold into it. Yes, this is mostly old targets without no-carry add/sub and the impact is on these 2 instructions which needs to be shrunk before folding. Although fold operands' shrinking capabilities are really limited compared to the shrink pass. https://github.com/llvm/llvm-project/pull/67878 From lldb-commits at lists.llvm.org Thu Oct 5 10:01:13 2023 From: lldb-commits at lists.llvm.org (Nikita Popov via lldb-commits) Date: Thu, 05 Oct 2023 10:01:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651eebd9.170a0220.7b097.5bac@mx.google.com> https://github.com/nikic approved this pull request. Basically LGTM, but I think this is still missing negative tests for non-equality pred1/pred2? https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Thu Oct 5 11:21:00 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 11:21:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651efe8c.170a0220.216e0.63c2@mx.google.com> ================ @@ -340,6 +340,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); ---------------- shraiysh wrote: The address is inserted to be deleted once at the beginning of this function. If AsPtr is true, control never comes here. If false, then the load instruction is added here. It wouldn't get deleted twice. https://github.com/llvm/llvm-project/pull/67723 From lldb-commits at lists.llvm.org Thu Oct 5 11:54:49 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 11:54:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [OpenMPIRBuilder] Remove wrapper function in `createTask`, `createTeams` (PR #67723) In-Reply-To: Message-ID: <651f0679.170a0220.4deb1.69c4@mx.google.com> https://github.com/shraiysh updated https://github.com/llvm/llvm-project/pull/67723 >From 6aabc3c10ea2d587120b74966b7ce96f9b8167af Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 28 Sep 2023 13:35:07 -0500 Subject: [PATCH 1/6] [OpenMPIRBuilder] Remove wrapper function in `createTask` This patch removes the wrapper function in `OpenMPIRBuilder::createTask`. The outlined function is directly of the form that is expected by the runtime library calls. This also fixes the global thread ID argument, which should be used whenever `kmpc_global_thread_num()` is called inside the outlined function. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 106 ++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 56 +++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 51 +++------ 3 files changed, 99 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9c70d384e55db2b..54012b488c6b671 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -1496,6 +1497,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { + // We create a temporary i32 value that will represent the global tid after + // outlining. + SmallVector ToBeDeleted; + Builder.restoreIP(AllocaIP); + AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); + LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); + ToBeDeleted.append({TID, TIDAddr}); + if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1523,41 +1532,27 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + // Fake use of TID + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); + BinaryOperator *AddInst = + dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); + ToBeDeleted.push_back(AddInst); + OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, - Dependencies](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - - // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + OI.ExcludeArgsFromAggregate = {TID}; + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); // HasShareds is true if any variables are captured in the outlined region, // false otherwise. - bool HasShareds = StaleCI->arg_size() > 0; + bool HasShareds = StaleCI->arg_size() > 1; Builder.SetInsertPoint(StaleCI); // Gather the arguments for emitting the runtime call for @@ -1595,7 +1590,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Value *SharedsSize = Builder.getInt64(0); if (HasShareds) { AllocaInst *ArgStructAlloca = - dyn_cast(StaleCI->getArgOperand(0)); + dyn_cast(StaleCI->getArgOperand(1)); assert(ArgStructAlloca && "Unable to find the alloca instruction corresponding to arguments " "for extracted function"); @@ -1606,31 +1601,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, SharedsSize = Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); } - - // Argument - task_entry (the wrapper function) - // If the outlined function has some captured variables (i.e. HasShareds is - // true), then the wrapper function will have an additional argument (the - // struct containing captured variables). Otherwise, no such argument will - // be present. - SmallVector WrapperArgTys{Builder.getInt32Ty()}; - if (HasShareds) - WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".wrapper").str(), - FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - // Emit the @__kmpc_omp_task_alloc runtime call // The runtime call returns a pointer to an area where the task captured // variables must be copied before the task is run (TaskData) CallInst *TaskData = Builder.CreateCall( TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, - /*task_func=*/WrapperFunc}); + /*task_func=*/&OutlinedFn}); // Copy the arguments for outlined function if (HasShareds) { - Value *Shareds = StaleCI->getArgOperand(0); + Value *Shareds = StaleCI->getArgOperand(1); Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, @@ -1697,10 +1678,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (IfCondition) { // `SplitBlockAndInsertIfThenElse` requires the block to have a // terminator. - BasicBlock *NewBasicBlock = - splitBB(Builder, /*CreateBranch=*/true, "if.end"); + splitBB(Builder, /*CreateBranch=*/true, "if.end"); Instruction *IfTerminator = - NewBasicBlock->getSinglePredecessor()->getTerminator(); + Builder.GetInsertPoint()->getParent()->getTerminator(); Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; Builder.SetInsertPoint(IfTerminator); SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, @@ -1711,10 +1691,12 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Function *TaskCompleteFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData}); + CallInst *CI = nullptr; if (HasShareds) - Builder.CreateCall(WrapperFunc, {ThreadID, TaskData}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData}); else - Builder.CreateCall(WrapperFunc, {ThreadID}); + CI = Builder.CreateCall(&OutlinedFn, {ThreadID}); + CI->setDebugLoc(StaleCI->getDebugLoc()); Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData}); Builder.SetInsertPoint(ThenTI); } @@ -1736,18 +1718,28 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, StaleCI->eraseFromParent(); - // Emit the body for wrapper function - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); + Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); if (HasShareds) { - llvm::Value *Shareds = - Builder.CreateLoad(VoidPtr, WrapperFunc->getArg(1)); - Builder.CreateCall(&OutlinedFn, {Shareds}); - } else { - Builder.CreateCall(&OutlinedFn); + LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); + OutlinedFn.getArg(1)->replaceUsesWithIf( + Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); + } + + // Replace kmpc_global_thread_num() calls with the global thread id + // argument. + OutlinedFn.getArg(0)->setName("global.tid"); + FunctionCallee TIDRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + for (Instruction &Inst : instructions(OutlinedFn)) { + CallInst *CI = dyn_cast(&Inst); + if (!CI) + continue; + if (CI->getCalledFunction() == TIDRTLFn.getCallee()) + CI->replaceAllUsesWith(OutlinedFn.getArg(0)); } - Builder.CreateRet(Builder.getInt32(0)); + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fd524f6067ee0ea..643b34270c01693 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5486,25 +5486,28 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { 24); // 64-bit pointer + 128-bit integer // Verify Wrapper function - Function *WrapperFunc = + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); + ASSERT_NE(OutlinedFn, nullptr); - LoadInst *SharedsLoad = dyn_cast(WrapperFunc->begin()->begin()); + LoadInst *SharedsLoad = dyn_cast(OutlinedFn->begin()->begin()); ASSERT_NE(SharedsLoad, nullptr); - EXPECT_EQ(SharedsLoad->getPointerOperand(), WrapperFunc->getArg(1)); - - EXPECT_FALSE(WrapperFunc->isDeclaration()); - CallInst *OutlinedFnCall = - dyn_cast(++WrapperFunc->begin()->begin()); - ASSERT_NE(OutlinedFnCall, nullptr); - EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); - EXPECT_EQ(OutlinedFnCall->getArgOperand(0), - WrapperFunc->getArg(1)->uses().begin()->getUser()); + EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); + + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); + + // Verify that the data argument is used only once, and that too in the load + // instruction that is then used for accessing shared data. + Value *DataPtr = OutlinedFn->getArg(1); + EXPECT_EQ(DataPtr->getNumUses(), 1); + EXPECT_TRUE(isa(DataPtr->uses().begin()->getUser())); + Value *Data = DataPtr->uses().begin()->getUser(); + EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { + return isa(U.getUser()); + })); // Verify the presence of `trunc` and `icmp` instructions in Outlined function - Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); - ASSERT_NE(OutlinedFn, nullptr); EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), @@ -5547,6 +5550,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { Builder.CreateRetVoid(); EXPECT_FALSE(verifyModule(*M, &errs())); + + // Check that the outlined function has only one argument. + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + Function *OutlinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)); + ASSERT_NE(OutlinedFn, nullptr); + ASSERT_EQ(OutlinedFn->arg_size(), 1); } TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { @@ -5658,8 +5669,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *Final = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5711,8 +5722,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { F->setName("func"); IRBuilder<> Builder(BB); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; - IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); Value *IfCondition = Builder.CreateICmp( CmpInst::Predicate::ICMP_EQ, F->getArg(0), @@ -5758,15 +5769,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { ->user_back()); ASSERT_NE(TaskBeginIfCall, nullptr); ASSERT_NE(TaskCompleteCall, nullptr); - Function *WrapperFunc = + Function *OulinedFn = dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); - ASSERT_NE(WrapperFunc, nullptr); - CallInst *WrapperFuncCall = dyn_cast(WrapperFunc->user_back()); - ASSERT_NE(WrapperFuncCall, nullptr); + ASSERT_NE(OulinedFn, nullptr); + CallInst *OulinedFnCall = dyn_cast(OulinedFn->user_back()); + ASSERT_NE(OulinedFnCall, nullptr); EXPECT_EQ(TaskBeginIfCall->getParent(), IfConditionBranchInst->getSuccessor(1)); - EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), WrapperFuncCall); - EXPECT_EQ(WrapperFuncCall->getNextNonDebugInstruction(), TaskCompleteCall); + + EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); + EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); } TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 28b0113a19d61b8..2cd561cb021075f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2209,7 +2209,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) omp.task { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2222,7 +2222,7 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2236,12 +2236,6 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task_with_deps @@ -2259,7 +2253,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, - // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}}) omp.task depend(taskdependin -> %zaddr : !llvm.ptr) { %n = llvm.mlir.constant(1 : i64) : i64 @@ -2272,7 +2266,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { llvm.return } -// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]]) // CHECK: task.alloca{{.*}}: // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: @@ -2286,11 +2280,6 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: [[exit_stub]]: // CHECK: ret void -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { -// CHECK: call void @[[outlined_fn]]() -// CHECK: ret i32 0 -// CHECK: } - // ----- // CHECK-LABEL: define void @omp_task @@ -2304,7 +2293,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 16, - // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK-SAME: ptr @[[outlined_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[shareds]], ptr {{.+}}, i64 16, i1 false) // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) @@ -2321,8 +2310,9 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { } } -// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: define internal void @[[outlined_fn]](i32 %[[global_tid:[^ ,]+]], ptr %[[task_data:.+]]) // CHECK: task.alloca{{.*}}: +// CHECK: %[[shareds:.+]] = load ptr, ptr %[[task_data]] // CHECK: br label %[[task_body:[^, ]+]] // CHECK: [[task_body]]: // CHECK: br label %[[task_region:[^, ]+]] @@ -2333,13 +2323,6 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { // CHECK: [[exit_stub]]: // CHECK: ret void - -// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { -// CHECK: %[[shareds:.+]] = load ptr, ptr %1, align 8 -// CHECK: call void @[[outlined_fn]](ptr %[[shareds]]) -// CHECK: ret i32 0 -// CHECK: } - // ----- llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { @@ -2355,14 +2338,12 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ -// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: define internal void @[[task_outlined_fn]] // CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) -// CHECK: define internal void @par_task_..omp_par..omp_par -// CHECK: define i32 @par_task_..omp_par.wrapper -// CHECK: call void @par_task_..omp_par +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[parallel_outlined_fn]] // ----- llvm.func @foo() -> () @@ -2432,7 +2413,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper) +// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 40, i64 0, ptr @[[outlined_task_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2445,7 +2426,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2 // CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8 // CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @omp_taskgroup_task..omp_par.1.wrapper) +// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 40, i64 16, ptr @[[outlined_task_fn:.+]]) // CHECK: %[[shareds:.+]] = load ptr, ptr %[[t2_alloc]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[shareds]], ptr align 1 %[[structArg]], i64 16, i1 false) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]]) @@ -2617,7 +2598,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @omp_task_final..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 %[[task_flags]], i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[task_exit:[^,]+]] // CHECK: [[task_exit]]: @@ -2648,14 +2629,14 @@ llvm.func @omp_task_if(%boolexpr: i1) { // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @omp_task_if..omp_par.wrapper) +// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 0, ptr @[[task_outlined_fn:.+]]) // CHECK: br i1 %[[boolexpr]], label %[[true_label:[^,]+]], label %[[false_label:[^,]+]] // CHECK: [[true_label]]: // CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit:[^,]+]] // CHECK: [[false_label:[^,]+]]: ; preds = %codeRepl // CHECK: call void @__kmpc_omp_task_begin_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) -// CHECK: %{{.+}} = call i32 @omp_task_if..omp_par.wrapper(i32 %[[omp_global_thread_num]]) +// CHECK: call void @[[task_outlined_fn]](i32 %[[omp_global_thread_num]]) // CHECK: call void @__kmpc_omp_task_complete_if0(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) // CHECK: br label %[[if_else_exit]] // CHECK: [[if_else_exit]]: >From a1a9438b5e00170030b419a7736053422745cbc6 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:22:30 -0500 Subject: [PATCH 2/6] Remove outlining for teams too. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 178 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 22 +-- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 54012b488c6b671..a5a73bcc10c48e3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -341,6 +341,44 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +// This function creates a fake integer value and a fake use for the integer +// value. It returns the fake value created. This is useful in modeling the +// extra arguments to the outlined functions. +Value *createFakeIntVal(IRBuilder<> &Builder, + OpenMPIRBuilder::InsertPointTy OuterAllocaIP, + std::stack &ToBeDeleted, + OpenMPIRBuilder::InsertPointTy InnerAllocaIP, + const Twine &Name = "", bool AsPtr = true) { + Builder.restoreIP(OuterAllocaIP); + Instruction *FakeVal; + AllocaInst *FakeValAddr = + Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); + ToBeDeleted.push(FakeValAddr); + + if (AsPtr) + FakeVal = FakeValAddr; + else { + FakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); + ToBeDeleted.push(FakeVal); + } + + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + + // Fake use of TID + Builder.restoreIP(InnerAllocaIP); + Instruction *UseFakeVal; + if (AsPtr) + UseFakeVal = + Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); + else + UseFakeVal = + cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + ToBeDeleted.push(UseFakeVal); + return FakeVal; +} + //===----------------------------------------------------------------------===// // OpenMPIRBuilderConfig //===----------------------------------------------------------------------===// @@ -1497,13 +1535,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, SmallVector Dependencies) { - // We create a temporary i32 value that will represent the global tid after - // outlining. - SmallVector ToBeDeleted; - Builder.restoreIP(AllocaIP); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - LoadInst *TID = Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"); - ToBeDeleted.append({TID, TIDAddr}); if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1532,19 +1563,24 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, BasicBlock *TaskAllocaBB = splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); - // Fake use of TID - Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin()); - BinaryOperator *AddInst = - dyn_cast(Builder.CreateAdd(TID, Builder.getInt32(10))); - ToBeDeleted.push_back(AddInst); + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.ExcludeArgsFromAggregate = {TID}; + + // Add the thread ID argument. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) { + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); @@ -1670,7 +1706,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // br label %exit // else: // call @__kmpc_omp_task_begin_if0(...) - // call @wrapper_fn(...) + // call @outlined_fn(...) // call @__kmpc_omp_task_complete_if0(...) // br label %exit // exit: @@ -1725,31 +1761,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - // Replace kmpc_global_thread_num() calls with the global thread id - // argument. - OutlinedFn.getArg(0)->setName("global.tid"); - FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - for (Instruction &Inst : instructions(OutlinedFn)) { - CallInst *CI = dyn_cast(&Inst); - if (!CI) - continue; - if (CI->getCalledFunction() == TIDRTLFn.getCallee()) - CI->replaceAllUsesWith(OutlinedFn.getArg(0)); + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); } - - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); - InsertPointTy TaskAllocaIP = - InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); - InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); - return Builder.saveIP(); } @@ -5740,6 +5759,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5763,84 +5783,62 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *AllocaBB = splitBB(Builder, /*CreateBranch=*/true, "teams.alloca"); + // Generate the body of teams. + InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); + InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); + BodyGenCB(AllocaIP, CodeGenIP); + OutlineInfo OI; OI.EntryBB = AllocaBB; OI.ExitBB = ExitBB; OI.OuterAllocaBB = &OuterAllocaBB; - OI.PostOutlineCB = [this, Ident](Function &OutlinedFn) { - // The input IR here looks like the following- - // ``` - // func @current_fn() { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` - // - // This is changed to the following- - // - // ``` - // func @current_fn() { - // runtime_call(..., wrapper_fn, ...) - // } - // func @wrapper_fn(..., %args) { - // outlined_fn(%args) - // } - // func @outlined_fn(%args) { ... } - // ``` + // Insert fake values for global tid and bound tid. + std::stack ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true)); + + OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable { // The stale call instruction will be replaced with a new call instruction - // for runtime call with a wrapper function. + // for runtime call with the outlined function. assert(OutlinedFn.getNumUses() == 1 && "there must be a single user for the outlined function"); CallInst *StaleCI = cast(OutlinedFn.user_back()); + ToBeDeleted.push(StaleCI); + + assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) && + "Outlined function must have two or three arguments only"); - // Create the wrapper function. - SmallVector WrapperArgTys{Builder.getPtrTy(), Builder.getPtrTy()}; - for (auto &Arg : OutlinedFn.args()) - WrapperArgTys.push_back(Arg.getType()); - FunctionCallee WrapperFuncVal = M.getOrInsertFunction( - (Twine(OutlinedFn.getName()) + ".teams").str(), - FunctionType::get(Builder.getVoidTy(), WrapperArgTys, false)); - Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); - WrapperFunc->getArg(0)->setName("global_tid"); - WrapperFunc->getArg(1)->setName("bound_tid"); - if (WrapperFunc->arg_size() > 2) - WrapperFunc->getArg(2)->setName("data"); - - // Emit the body of the wrapper function - just a call to outlined function - // and return statement. - BasicBlock *WrapperEntryBB = - BasicBlock::Create(M.getContext(), "entrybb", WrapperFunc); - Builder.SetInsertPoint(WrapperEntryBB); - SmallVector Args; - for (size_t ArgIndex = 2; ArgIndex < WrapperFunc->arg_size(); ArgIndex++) - Args.push_back(WrapperFunc->getArg(ArgIndex)); - Builder.CreateCall(&OutlinedFn, Args); - Builder.CreateRetVoid(); - - OutlinedFn.addFnAttr(Attribute::AttrKind::AlwaysInline); + bool HasShared = OutlinedFn.arg_size() == 3; + + OutlinedFn.getArg(0)->setName("global.tid.ptr"); + OutlinedFn.getArg(1)->setName("bound.tid.ptr"); + if (HasShared) + OutlinedFn.getArg(2)->setName("data"); // Call to the runtime function for teams in the current function. assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - Args = {Ident, Builder.getInt32(StaleCI->arg_size()), WrapperFunc}; - for (Use &Arg : StaleCI->args()) - Args.push_back(Arg); + SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), + &OutlinedFn}; + if (HasShared) + Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - StaleCI->eraseFromParent(); + + while (!ToBeDeleted.empty()) { + ToBeDeleted.top()->eraseFromParent(); + ToBeDeleted.pop(); + } }; addOutlineInfo(std::move(OI)); - // Generate the body of teams. - InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); - InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 643b34270c01693..c4b0389c89c7c60 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4057,25 +4057,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { ASSERT_NE(SrcSrc, nullptr); // Verify the outlined function signature. - Function *WrapperFn = + Function *OutlinedFn = dyn_cast(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); - ASSERT_NE(WrapperFn, nullptr); - EXPECT_FALSE(WrapperFn->isDeclaration()); - EXPECT_TRUE(WrapperFn->arg_size() >= 3); - EXPECT_EQ(WrapperFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid - EXPECT_EQ(WrapperFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid - EXPECT_EQ(WrapperFn->getArg(2)->getType(), + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_FALSE(OutlinedFn->isDeclaration()); + EXPECT_TRUE(OutlinedFn->arg_size() >= 3); + EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid + EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid + EXPECT_EQ(OutlinedFn->getArg(2)->getType(), Builder.getPtrTy()); // captured args // Check for TruncInst and ICmpInst in the outlined function. - inst_range Instructions = instructions(WrapperFn); - auto OutlinedFnInst = find_if( - Instructions, [](Instruction &Inst) { return isa(&Inst); }); - ASSERT_NE(OutlinedFnInst, Instructions.end()); - CallInst *OutlinedFnCI = dyn_cast(&*OutlinedFnInst); - ASSERT_NE(OutlinedFnCI, nullptr); - Function *OutlinedFn = OutlinedFnCI->getCalledFunction(); - EXPECT_TRUE(any_of(instructions(OutlinedFn), [](Instruction &inst) { return isa(&inst); })); EXPECT_TRUE(any_of(instructions(OutlinedFn), >From 4b71558a1936983e1eeebfee98de6b4d8f1062cc Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:26:57 -0500 Subject: [PATCH 3/6] Remove unintentional include for InstIterator.h --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a5a73bcc10c48e3..f62d244a2dc4c68 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -35,7 +35,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" >From 7c95d29b677c6107f81b0c26c139a34475a6fe81 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Mon, 2 Oct 2023 09:50:26 -0500 Subject: [PATCH 4/6] Fix insertpoint after createTask --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f62d244a2dc4c68..5ed2a345a14dd04 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1566,7 +1566,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); OutlineInfo OI; OI.EntryBB = TaskAllocaBB; @@ -1767,6 +1766,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, }; addOutlineInfo(std::move(OI)); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); return Builder.saveIP(); } >From 1bef65fbc9c4de9ee55234063e6895c11b3f68ea Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Wed, 4 Oct 2023 16:27:31 -0500 Subject: [PATCH 5/6] Remove wrapper function from MLIR tests. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++-- mlir/test/Target/LLVMIR/openmp-teams.mlir | 22 ++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8246a55a9887e8a..f647055a9733de9 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5822,8 +5822,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, assert(StaleCI && "Error while outlining - no CallInst user found for the " "outlined function."); Builder.SetInsertPoint(StaleCI); - SmallVector Args = {Ident, Builder.getInt32(StaleCI->arg_size()), - &OutlinedFn}; + SmallVector Args = { + Ident, Builder.getInt32(StaleCI->arg_size() - 2), &OutlinedFn}; if (HasShared) Args.push_back(StaleCI->getArgOperand(2)); Builder.CreateCall(getOrCreateRuntimeFunctionPtr( diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir index 16457e88774b93a..18fc2bb5a3c61b2 100644 --- a/mlir/test/Target/LLVMIR/openmp-teams.mlir +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -3,7 +3,7 @@ llvm.func @foo() // CHECK-LABEL: @omp_teams_simple -// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[WRAPPER_FN:.+]]) +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr [[OUTLINED_FN:.+]]) // CHECK: ret void llvm.func @omp_teams_simple() { omp.teams { @@ -13,12 +13,9 @@ llvm.func @omp_teams_simple() { llvm.return } -// CHECK: define internal void @[[OUTLINED_FN:.+]]() +// CHECK: define internal void @[[OUTLINED_FN:.+]](ptr {{.+}}, ptr {{.+}}) // CHECK: call void @foo() // CHECK: ret void -// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}) -// CHECK: call void @[[OUTLINED_FN]] -// CHECK: ret void // ----- @@ -30,7 +27,7 @@ llvm.func @foo(i32) -> () // CHECK: br // CHECK: [[GEP:%.+]] = getelementptr { i32 }, ptr [[STRUCT_ARG]], i32 0, i32 0 // CHECK: store i32 [[ARG0]], ptr [[GEP]] -// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[WRAPPER_FN:.+]], ptr [[STRUCT_ARG]]) +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[OUTLINED_FN:.+]], ptr [[STRUCT_ARG]]) // CHECK: ret void llvm.func @omp_teams_shared_simple(%arg0: i32) { omp.teams { @@ -40,14 +37,11 @@ llvm.func @omp_teams_shared_simple(%arg0: i32) { llvm.return } -// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr [[STRUCT_ARG:%.+]]) +// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr {{.+}}, ptr {{.+}}, ptr [[STRUCT_ARG:%.+]]) // CHECK: [[GEP:%.+]] = getelementptr { i32 }, ptr [[STRUCT_ARG]], i32 0, i32 0 // CHECK: [[LOAD_GEP:%.+]] = load i32, ptr [[GEP]] // CHECK: call void @foo(i32 [[LOAD_GEP]]) // CHECK: ret void -// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}, ptr [[STRUCT_ARG:.+]]) -// CHECK: call void [[OUTLINED_FN]](ptr [[STRUCT_ARG]]) -// CHECK: ret void // ----- @@ -81,7 +75,7 @@ llvm.func @bar() // CHECK: store i32 [[LOADED]], ptr [[LOADED_PTR]] // Runtime call. -// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[WRAPPER_FN:@.+]], ptr [[STRUCT_ARG]]) +// CHECK: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[OUTLINED_FN:@.+]], ptr [[STRUCT_ARG]]) // CHECK: br label // CHECK: call void @bar() // CHECK: ret void @@ -105,7 +99,7 @@ llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %a } // Check the outlined function. -// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr [[DATA:%.+]]) +// CHECK: define internal void [[OUTLINED_FN:@.+]](ptr {{.+}}, ptr {{.+}}, ptr [[DATA:%.+]]) // CHECK: [[CONDITION_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]] // CHECK: [[CONDITION:%.+]] = load i1, ptr [[CONDITION_PTR]] // CHECK: [[ARG0_PTR:%.+]] = getelementptr {{.+}}, ptr [[DATA]], i32 0, i32 1 @@ -130,7 +124,3 @@ llvm.func @omp_teams_branching_shared(%condition: i1, %arg0: i32, %arg1: f32, %a // CHECK-NEXT: br label // CHECK: ret void -// Check the wrapper function -// CHECK: define void [[WRAPPER_FN]](ptr {{.+}}, ptr {{.+}}, ptr [[DATA:%.+]]) -// CHECK: call void [[OUTLINED_FN]](ptr [[DATA]]) -// CHECK: ret void >From 1ef1690d7fcdb8ca7b9ef6f0a6ed1ae8756203dd Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Thu, 5 Oct 2023 13:54:04 -0500 Subject: [PATCH 6/6] Address comments --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f647055a9733de9..e985b9551c60ad4 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -354,26 +354,24 @@ Value *createFakeIntVal(IRBuilder<> &Builder, Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); ToBeDeleted.push(FakeValAddr); - if (AsPtr) + if (AsPtr) { FakeVal = FakeValAddr; - else { + } else { FakeVal = Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); ToBeDeleted.push(FakeVal); } - // We only need TIDAddr and ZeroAddr for modeling purposes to get the - // associated arguments in the outlined function, so we delete them later. - - // Fake use of TID + // Generate a fake use of this value Builder.restoreIP(InnerAllocaIP); Instruction *UseFakeVal; - if (AsPtr) + if (AsPtr) { UseFakeVal = Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); - else + } else { UseFakeVal = cast(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); + } ToBeDeleted.push(UseFakeVal); return FakeVal; } @@ -5758,7 +5756,6 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry"); Builder.SetInsertPoint(BodyBB, BodyBB->begin()); } - InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: @@ -5794,6 +5791,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, // Insert fake values for global tid and bound tid. std::stack ToBeDeleted; + InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin()); OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true)); OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( From lldb-commits at lists.llvm.org Thu Oct 5 12:57:21 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Thu, 05 Oct 2023 12:57:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][NFCI] Remove use of ConstString from FilterRule in StructuredDataDarwinLog (PR #68347) Message-ID: https://github.com/bulbazord created https://github.com/llvm/llvm-project/pull/68347 There are only ever 2 FilterRules and their operations are either "regex" or "match". This does not benefit from deduplication since the strings have static lifetime and we can just compare StringRefs pointing to them. This is also not on a fast path, so it doesn't really benefit from the pointer comparisons of ConstStrings. >From 2c8ae3c5d1ff8b841255a79f7f64f81dd2bf9df1 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Thu, 5 Oct 2023 12:52:10 -0700 Subject: [PATCH] [lldb][NFCI] Remove use of ConstString from FilterRule in StructuredDataDarwinLog There are only ever 2 FilterRules and their operations are either "regex" or "match". This does not benefit from deduplication since the strings have static lifetime and we can just compare StringRefs pointing to them. This is also not on a fast path, so it doesn't really benefit from the pointer comparisons of ConstStrings. --- .../DarwinLog/StructuredDataDarwinLog.cpp | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp index 61e04900da342d2..f8a8df84ca37f29 100644 --- a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp +++ b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp @@ -32,6 +32,8 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/RegularExpression.h" +#include "llvm/ADT/StringMap.h" + #define DARWIN_LOG_TYPE_VALUE "DarwinLog" using namespace lldb; @@ -183,21 +185,20 @@ class FilterRule { std::function; - static void RegisterOperation(ConstString operation, + static void RegisterOperation(llvm::StringRef operation, const OperationCreationFunc &creation_func) { GetCreationFuncMap().insert(std::make_pair(operation, creation_func)); } static FilterRuleSP CreateRule(bool match_accepts, size_t attribute, - ConstString operation, + llvm::StringRef operation, const std::string &op_arg, Status &error) { // Find the creation func for this type of filter rule. auto map = GetCreationFuncMap(); auto find_it = map.find(operation); if (find_it == map.end()) { - error.SetErrorStringWithFormat("unknown filter operation \"" - "%s\"", - operation.GetCString()); + error.SetErrorStringWithFormatv("unknown filter operation \"{0}\"", + operation); return FilterRuleSP(); } @@ -217,7 +218,7 @@ class FilterRule { dict_p->AddStringItem("attribute", s_filter_attributes[m_attribute_index]); // Indicate the type of the rule. - dict_p->AddStringItem("type", GetOperationType().GetCString()); + dict_p->AddStringItem("type", GetOperationType()); // Let the rule add its own specific details here. DoSerialization(*dict_p); @@ -227,10 +228,10 @@ class FilterRule { virtual void Dump(Stream &stream) const = 0; - ConstString GetOperationType() const { return m_operation; } + llvm::StringRef GetOperationType() const { return m_operation; } protected: - FilterRule(bool accept, size_t attribute_index, ConstString operation) + FilterRule(bool accept, size_t attribute_index, llvm::StringRef operation) : m_accept(accept), m_attribute_index(attribute_index), m_operation(operation) {} @@ -243,7 +244,7 @@ class FilterRule { } private: - using CreationFuncMap = std::map; + using CreationFuncMap = llvm::StringMap; static CreationFuncMap &GetCreationFuncMap() { static CreationFuncMap s_map; @@ -252,7 +253,8 @@ class FilterRule { const bool m_accept; const size_t m_attribute_index; - const ConstString m_operation; + // The lifetime of m_operation should be static. + const llvm::StringRef m_operation; }; using FilterRules = std::vector; @@ -296,8 +298,8 @@ class RegexFilterRule : public FilterRule { return FilterRuleSP(new RegexFilterRule(accept, attribute_index, op_arg)); } - static ConstString StaticGetOperation() { - static ConstString s_operation("regex"); + static llvm::StringRef StaticGetOperation() { + static constexpr llvm::StringLiteral s_operation("regex"); return s_operation; } @@ -341,8 +343,8 @@ class ExactMatchFilterRule : public FilterRule { new ExactMatchFilterRule(accept, attribute_index, op_arg)); } - static ConstString StaticGetOperation() { - static ConstString s_operation("match"); + static llvm::StringRef StaticGetOperation() { + static constexpr llvm::StringLiteral s_operation("match"); return s_operation; } @@ -701,7 +703,7 @@ class EnableOptions : public Options { // add filter spec auto rule_sp = FilterRule::CreateRule( - accept, attribute_index, ConstString(operation), + accept, attribute_index, operation, std::string(rule_text.substr(operation_end_pos + 1)), error); if (rule_sp && error.Success()) From lldb-commits at lists.llvm.org Thu Oct 5 12:58:25 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 12:58:25 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][NFCI] Remove use of ConstString from FilterRule in StructuredDataDarwinLog (PR #68347) In-Reply-To: Message-ID: <651f1561.630a0220.6a6c2.60c4@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes There are only ever 2 FilterRules and their operations are either "regex" or "match". This does not benefit from deduplication since the strings have static lifetime and we can just compare StringRefs pointing to them. This is also not on a fast path, so it doesn't really benefit from the pointer comparisons of ConstStrings. --- Full diff: https://github.com/llvm/llvm-project/pull/68347.diff 1 Files Affected: - (modified) lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp (+17-15) ``````````diff diff --git a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp index 61e04900da342d2..f8a8df84ca37f29 100644 --- a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp +++ b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp @@ -32,6 +32,8 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/RegularExpression.h" +#include "llvm/ADT/StringMap.h" + #define DARWIN_LOG_TYPE_VALUE "DarwinLog" using namespace lldb; @@ -183,21 +185,20 @@ class FilterRule { std::function; - static void RegisterOperation(ConstString operation, + static void RegisterOperation(llvm::StringRef operation, const OperationCreationFunc &creation_func) { GetCreationFuncMap().insert(std::make_pair(operation, creation_func)); } static FilterRuleSP CreateRule(bool match_accepts, size_t attribute, - ConstString operation, + llvm::StringRef operation, const std::string &op_arg, Status &error) { // Find the creation func for this type of filter rule. auto map = GetCreationFuncMap(); auto find_it = map.find(operation); if (find_it == map.end()) { - error.SetErrorStringWithFormat("unknown filter operation \"" - "%s\"", - operation.GetCString()); + error.SetErrorStringWithFormatv("unknown filter operation \"{0}\"", + operation); return FilterRuleSP(); } @@ -217,7 +218,7 @@ class FilterRule { dict_p->AddStringItem("attribute", s_filter_attributes[m_attribute_index]); // Indicate the type of the rule. - dict_p->AddStringItem("type", GetOperationType().GetCString()); + dict_p->AddStringItem("type", GetOperationType()); // Let the rule add its own specific details here. DoSerialization(*dict_p); @@ -227,10 +228,10 @@ class FilterRule { virtual void Dump(Stream &stream) const = 0; - ConstString GetOperationType() const { return m_operation; } + llvm::StringRef GetOperationType() const { return m_operation; } protected: - FilterRule(bool accept, size_t attribute_index, ConstString operation) + FilterRule(bool accept, size_t attribute_index, llvm::StringRef operation) : m_accept(accept), m_attribute_index(attribute_index), m_operation(operation) {} @@ -243,7 +244,7 @@ class FilterRule { } private: - using CreationFuncMap = std::map; + using CreationFuncMap = llvm::StringMap; static CreationFuncMap &GetCreationFuncMap() { static CreationFuncMap s_map; @@ -252,7 +253,8 @@ class FilterRule { const bool m_accept; const size_t m_attribute_index; - const ConstString m_operation; + // The lifetime of m_operation should be static. + const llvm::StringRef m_operation; }; using FilterRules = std::vector; @@ -296,8 +298,8 @@ class RegexFilterRule : public FilterRule { return FilterRuleSP(new RegexFilterRule(accept, attribute_index, op_arg)); } - static ConstString StaticGetOperation() { - static ConstString s_operation("regex"); + static llvm::StringRef StaticGetOperation() { + static constexpr llvm::StringLiteral s_operation("regex"); return s_operation; } @@ -341,8 +343,8 @@ class ExactMatchFilterRule : public FilterRule { new ExactMatchFilterRule(accept, attribute_index, op_arg)); } - static ConstString StaticGetOperation() { - static ConstString s_operation("match"); + static llvm::StringRef StaticGetOperation() { + static constexpr llvm::StringLiteral s_operation("match"); return s_operation; } @@ -701,7 +703,7 @@ class EnableOptions : public Options { // add filter spec auto rule_sp = FilterRule::CreateRule( - accept, attribute_index, ConstString(operation), + accept, attribute_index, operation, std::string(rule_text.substr(operation_end_pos + 1)), error); if (rule_sp && error.Success()) ``````````
https://github.com/llvm/llvm-project/pull/68347 From lldb-commits at lists.llvm.org Thu Oct 5 13:16:48 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Thu, 05 Oct 2023 13:16:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][NFCI] Remove use of ConstString from FilterRule in StructuredDataDarwinLog (PR #68347) In-Reply-To: Message-ID: <651f19b0.170a0220.17d5d.7761@mx.google.com> https://github.com/walter-erquinigo approved this pull request. https://github.com/llvm/llvm-project/pull/68347 From lldb-commits at lists.llvm.org Thu Oct 5 14:34:33 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Thu, 05 Oct 2023 14:34:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651f2be9.a70a0220.81a9c.04a5@mx.google.com> https://github.com/bulbazord approved this pull request. The warnings look good to me, thanks for taking care of that. How does this look @jimingham? https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Thu Oct 5 15:00:16 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Thu, 05 Oct 2023 15:00:16 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651f31f0.170a0220.92abc.7adc@mx.google.com> https://github.com/adrian-prantl approved this pull request. https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Thu Oct 5 16:47:48 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 16:47:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651f4b24.620a0220.56648.0cf7@mx.google.com> jimingham wrote: LGTM https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Thu Oct 5 17:17:44 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Thu, 05 Oct 2023 17:17:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651f5228.170a0220.69ec7.8beb@mx.google.com> walter-erquinigo wrote: Thanks! https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Thu Oct 5 17:17:52 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 17:17:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] 87c6ff6 - [LLDB] Allow specifying a custom exports file (#68013) Message-ID: <651f5230.630a0220.da186.792b@mx.google.com> Author: Walter Erquinigo Date: 2023-10-05T20:17:48-04:00 New Revision: 87c6ff6da82a1288ce5c80370d5d8cdd4c20220d URL: https://github.com/llvm/llvm-project/commit/87c6ff6da82a1288ce5c80370d5d8cdd4c20220d DIFF: https://github.com/llvm/llvm-project/commit/87c6ff6da82a1288ce5c80370d5d8cdd4c20220d.diff LOG: [LLDB] Allow specifying a custom exports file (#68013) LLDB has the cmake flag `LLDB_EXPORT_ALL_SYMBOLS` that exports the lldb, lldb_private namespaces, as well as other symbols like python and lua (see `lldb/source/API/liblldb-private.exports`). However, not all symbols in lldb fall into these categories and in order to get access to some symbols that live in plugin folders (like dwarf parsing symbols), it's useful to be able to specify a custom exports file giving more control to the developer using lldb as a library. This adds the new cmake flag `LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE` that is used when `LLDB_EXPORT_ALL_SYMBOLS` is enabled to specify that custom exports file. This is a follow up of https://github.com/llvm/llvm-project/pull/67851 Added: Modified: lldb/cmake/modules/LLDBConfig.cmake lldb/source/API/CMakeLists.txt Removed: ################################################################################ diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 380016ce48015fa..ce5e666a6f5e1ac 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -123,7 +123,10 @@ if(APPLE AND CMAKE_GENERATOR STREQUAL Xcode) endif() set(LLDB_EXPORT_ALL_SYMBOLS 0 CACHE BOOL - "Causes lldb to export all symbols when building liblldb.") + "Causes lldb to export some private symbols when building liblldb. See lldb/source/API/liblldb-private.exports for the full list of symbols that get exported.") + +set(LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE "" CACHE PATH + "When `LLDB_EXPORT_ALL_SYMBOLS` is enabled, this specifies the exports file to use when building liblldb.") if ((NOT MSVC) OR MSVC12) add_definitions( -DHAVE_ROUND ) diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..a574a461d4920ae 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -177,11 +177,18 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") # from working on some systems but limits the liblldb size. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb namespace") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb.exports) - else() - # Don't use an explicit export. Instead, tell the linker to - # export all symbols. + elseif (NOT LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE) + # Don't use an explicit export. Instead, tell the linker to export all symbols. MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports) + else () + MESSAGE("-- Symbols (liblldb): exporting all symbols specified in the exports " + " file '${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}'") + MESSAGE(WARNING "Private LLDB symbols frequently change and no API stability is guaranteed. " + "Only the SB API is guaranteed to be stable.") + add_llvm_symbol_exports(liblldb "${LLDB_EXPORT_ALL_SYMBOLS_EXPORTS_FILE}") endif() set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc") elseif (LLDB_EXPORT_ALL_SYMBOLS) From lldb-commits at lists.llvm.org Thu Oct 5 17:17:54 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Thu, 05 Oct 2023 17:17:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB] Allow specifying a custom exports file (PR #68013) In-Reply-To: Message-ID: <651f5232.050a0220.6b819.0e05@mx.google.com> https://github.com/walter-erquinigo closed https://github.com/llvm/llvm-project/pull/68013 From lldb-commits at lists.llvm.org Thu Oct 5 19:31:51 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Thu, 05 Oct 2023 19:31:51 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <651f7197.170a0220.fd786.184b@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 1/4] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 2/4] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 3/4] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 4/4] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; From lldb-commits at lists.llvm.org Thu Oct 5 19:53:18 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Thu, 05 Oct 2023 19:53:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <651f769e.170a0220.d26e4.9389@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 1/8] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 2/8] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 3/8] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 4/8] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 5/8] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 6/8] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 7/8] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 8/8] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; From lldb-commits at lists.llvm.org Thu Oct 5 19:56:18 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Thu, 05 Oct 2023 19:56:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <651f7752.630a0220.4c3e8.7960@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 1/9] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 2/9] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 3/9] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 4/9] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 5/9] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 6/9] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 7/9] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 8/9] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 9/9] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// From lldb-commits at lists.llvm.org Thu Oct 5 21:08:34 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Thu, 05 Oct 2023 21:08:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) Message-ID: https://github.com/JDevlieghere created https://github.com/llvm/llvm-project/pull/68378 Add the ability to list all processes through the SB API. rdar://116188959 >From 8611bebd2b6cd4f6de797240c1cb184af71f384d Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 5 Oct 2023 21:07:03 -0700 Subject: [PATCH] [lldb] Expose SBPlatform::GetAllProcesses to the SB API Add the ability to list all processes through the SB API. rdar://116188959 --- lldb/bindings/headers.swig | 1 + .../interface/SBProcessInfoListExtensions.i | 13 ++++ lldb/bindings/interfaces.swig | 2 + lldb/include/lldb/API/LLDB.h | 1 + lldb/include/lldb/API/SBDefines.h | 1 + lldb/include/lldb/API/SBPlatform.h | 4 + lldb/include/lldb/API/SBProcessInfo.h | 1 + lldb/include/lldb/API/SBProcessInfoList.h | 46 ++++++++++++ lldb/include/lldb/Target/Platform.h | 4 +- lldb/include/lldb/Utility/ProcessInfo.h | 20 +++++ lldb/source/API/CMakeLists.txt | 1 + lldb/source/API/SBPlatform.cpp | 15 ++++ lldb/source/API/SBProcessInfoList.cpp | 73 +++++++++++++++++++ lldb/source/Target/Platform.cpp | 8 ++ .../TestPlatformListProcesses.py | 54 ++++++++++++++ 15 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 lldb/bindings/interface/SBProcessInfoListExtensions.i create mode 100644 lldb/include/lldb/API/SBProcessInfoList.h create mode 100644 lldb/source/API/SBProcessInfoList.cpp create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index d392ed43d8c0c9e..b1d88726f754354 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -46,6 +46,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/bindings/interface/SBProcessInfoListExtensions.i b/lldb/bindings/interface/SBProcessInfoListExtensions.i new file mode 100644 index 000000000000000..42999846ef6a52f --- /dev/null +++ b/lldb/bindings/interface/SBProcessInfoListExtensions.i @@ -0,0 +1,13 @@ +%extend lldb::SBProcessInfoList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of process info in a lldb.SBProcessInfoListExtensions object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the process info in a lldb.SBProcessInfoListExtensions object.''' + return lldb_iter(self, 'GetSize', 'GetProcessInfoAtIndex') + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 306cfe683893271..373c2f6cf545cfb 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -122,6 +122,7 @@ %include "lldb/API/SBPlatform.h" %include "lldb/API/SBProcess.h" %include "lldb/API/SBProcessInfo.h" +%include "lldb/API/SBProcessInfoList.h" %include "lldb/API/SBQueue.h" %include "lldb/API/SBQueueItem.h" %include "lldb/API/SBReproducer.h" @@ -184,6 +185,7 @@ %include "./interface/SBModuleSpecExtensions.i" %include "./interface/SBModuleSpecListExtensions.i" %include "./interface/SBProcessExtensions.i" +%include "./interface/SBProcessInfoListExtensions.i" %include "./interface/SBQueueItemExtensions.i" %include "./interface/SBScriptObjectExtensions.i" %include "./interface/SBSectionExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index eacbbeafcf1cd86..f652d1bdb835b59 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -49,6 +49,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index ec5e940fdaf36fc..c6f01cc03f263c8 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -90,6 +90,7 @@ class LLDB_API SBPlatformConnectOptions; class LLDB_API SBPlatformShellCommand; class LLDB_API SBProcess; class LLDB_API SBProcessInfo; +class LLDB_API SBProcessInfoList; class LLDB_API SBQueue; class LLDB_API SBQueueItem; class LLDB_API SBReplayOptions; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index e0acc7003a54bc3..d63d2ed1eaba627 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -11,11 +11,13 @@ #include "lldb/API/SBDefines.h" #include "lldb/API/SBProcess.h" +#include "lldb/API/SBProcessInfoList.h" #include struct PlatformConnectOptions; struct PlatformShellCommand; +class ProcessInstanceInfoMatch; namespace lldb { @@ -154,6 +156,8 @@ class LLDB_API SBPlatform { SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, SBTarget &target, SBError &error); + SBProcessInfoList GetAllProcesses(SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcessInfo.h b/lldb/include/lldb/API/SBProcessInfo.h index 36fae9e842a6136..aec5924e4704a49 100644 --- a/lldb/include/lldb/API/SBProcessInfo.h +++ b/lldb/include/lldb/API/SBProcessInfo.h @@ -55,6 +55,7 @@ class LLDB_API SBProcessInfo { private: friend class SBProcess; + friend class SBProcessInfoList; lldb_private::ProcessInstanceInfo &ref(); diff --git a/lldb/include/lldb/API/SBProcessInfoList.h b/lldb/include/lldb/API/SBProcessInfoList.h new file mode 100644 index 000000000000000..7591fb3db713874 --- /dev/null +++ b/lldb/include/lldb/API/SBProcessInfoList.h @@ -0,0 +1,46 @@ +//===-- SBProcessInfoList.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBPROCESSINSTANCEINFOLIST_H +#define LLDB_API_SBPROCESSINSTANCEINFOLIST_H + +#include "lldb/API/SBDefines.h" + +#include + +namespace lldb_private { +class ProcessInfoList; +} // namespace lldb_private + +namespace lldb { + +class LLDB_API SBProcessInfoList { +public: + SBProcessInfoList(); + ~SBProcessInfoList(); + + SBProcessInfoList(const lldb::SBProcessInfoList &rhs); + + const lldb::SBProcessInfoList &operator=(const lldb::SBProcessInfoList &rhs); + + uint32_t GetSize() const; + + bool GetProcessInfoAtIndex(uint32_t idx, SBProcessInfo &info); + + void Clear(); + +private: + friend SBPlatform; + + SBProcessInfoList(const lldb_private::ProcessInfoList &impl); + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBPROCESSINSTANCEINFOLIST_H diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 08a58c80ef84779..129e4565d9ff993 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -407,6 +407,8 @@ class Platform : public PluginInterface { virtual uint32_t FindProcesses(const ProcessInstanceInfoMatch &match_info, ProcessInstanceInfoList &proc_infos); + ProcessInstanceInfoList GetAllProcesses(); + virtual bool GetProcessInfo(lldb::pid_t pid, ProcessInstanceInfo &proc_info); // Set a breakpoint on all functions that can end up creating a thread for @@ -883,7 +885,7 @@ class Platform : public PluginInterface { } virtual CompilerType GetSiginfoType(const llvm::Triple &triple); - + virtual Args GetExtraStartupCommands(); typedef std::function ProcessInstanceInfoList; +class ProcessInfoList { +public: + ProcessInfoList(const ProcessInstanceInfoList &list) : m_list(list) {} + + uint32_t GetSize() const { return m_list.size(); } + + bool GetProcessInfoAtIndex(uint32_t idx, ProcessInstanceInfo &info) { + if (idx < m_list.size()) { + info = m_list[idx]; + return true; + } + return false; + } + + void Clear() { return m_list.clear(); } + +private: + ProcessInstanceInfoList m_list; +}; + // ProcessInstanceInfoMatch // // A class to help matching one ProcessInstanceInfo to another. diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..fb10d764be02c94 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -61,6 +61,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBPlatform.cpp SBProcess.cpp SBProcessInfo.cpp + SBProcessInfoList.cpp SBQueue.cpp SBQueueItem.cpp SBReproducer.cpp diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index c31848fe04ea72c..3623fd35bcdf13f 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -14,6 +14,7 @@ #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" @@ -599,6 +600,20 @@ SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, return {}; } +SBProcessInfoList SBPlatform::GetAllProcesses(SBError &error) { + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessInstanceInfoList list = platform_sp->GetAllProcesses(); + return SBProcessInfoList(list); + } + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/source/API/SBProcessInfoList.cpp b/lldb/source/API/SBProcessInfoList.cpp new file mode 100644 index 000000000000000..a4d1e353f27d90e --- /dev/null +++ b/lldb/source/API/SBProcessInfoList.cpp @@ -0,0 +1,73 @@ +//===-- SBProcessInfoList.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "Utils.h" +#include "lldb/API/SBProcessInfo.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/ProcessInfo.h" + +using namespace lldb; +using namespace lldb_private; + +SBProcessInfoList::SBProcessInfoList() = default; + +SBProcessInfoList::~SBProcessInfoList() = default; + +SBProcessInfoList::SBProcessInfoList(const ProcessInfoList &impl) + : m_opaque_up(std::make_unique(impl)) { + LLDB_INSTRUMENT_VA(this, impl); +} + +SBProcessInfoList::SBProcessInfoList(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +const lldb::SBProcessInfoList & +SBProcessInfoList::operator=(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +uint32_t SBProcessInfoList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + return m_opaque_up->GetSize(); + + return 0; +} + +void SBProcessInfoList::Clear() { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + m_opaque_up->Clear(); +} + +bool SBProcessInfoList::GetProcessInfoAtIndex(uint32_t idx, + SBProcessInfo &info) { + LLDB_INSTRUMENT_VA(this, idx, info); + + if (m_opaque_up) { + lldb_private::ProcessInstanceInfo process_instance_info; + if (m_opaque_up->GetProcessInfoAtIndex(idx, process_instance_info)) { + info.SetProcessInfo(process_instance_info); + return true; + } + } + + return false; +} diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c117339f07cc9df..c345e33136070f2 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); + FindProcesses(match, processes); + return processes; +} + Status Platform::LaunchProcess(ProcessLaunchInfo &launch_info) { Status error; Log *log = GetLog(LLDBLog::Platform); diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py new file mode 100644 index 000000000000000..be0e3f5f8c50112 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py @@ -0,0 +1,54 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformListProcesses(GDBRemoteTestBase): + @skipIfRemote + @skipIfWindows + def test_get_all_processes(self): + """Test listing processes""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + self.done = False + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + def qsProcessInfo(self): + if not self.done: + self.done = True + return "pid:95126;name:666f6f;" + return "E10" + + self.server.responder = MyPlatformResponder() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(self.server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + processes = platform.GetAllProcesses(error) + self.assertSuccess(error) + self.assertEqual(processes.GetSize(), 2) + self.assertEqual(len(processes), 2) + + process_info = lldb.SBProcessInfo() + processes.GetProcessInfoAtIndex(0, process_info) + self.assertEqual(process_info.GetProcessID(), 95117) + self.assertEqual(process_info.GetName(), "foo") + + processes.GetProcessInfoAtIndex(1, process_info) + self.assertEqual(process_info.GetProcessID(), 95126) + self.assertEqual(process_info.GetName(), "foo") + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Thu Oct 5 21:09:10 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Thu, 05 Oct 2023 21:09:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <651f8866.170a0220.f8c89.9aad@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/10] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/10] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/10] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/10] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/10] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/10] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/10] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/10] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/10] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/10] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] From lldb-commits at lists.llvm.org Thu Oct 5 21:09:42 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Thu, 05 Oct 2023 21:09:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8886.a70a0220.35e72.1e38@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes Add the ability to list all processes through the SB API. rdar://116188959 --- Full diff: https://github.com/llvm/llvm-project/pull/68378.diff 15 Files Affected: - (modified) lldb/bindings/headers.swig (+1) - (added) lldb/bindings/interface/SBProcessInfoListExtensions.i (+13) - (modified) lldb/bindings/interfaces.swig (+2) - (modified) lldb/include/lldb/API/LLDB.h (+1) - (modified) lldb/include/lldb/API/SBDefines.h (+1) - (modified) lldb/include/lldb/API/SBPlatform.h (+4) - (modified) lldb/include/lldb/API/SBProcessInfo.h (+1) - (added) lldb/include/lldb/API/SBProcessInfoList.h (+46) - (modified) lldb/include/lldb/Target/Platform.h (+3-1) - (modified) lldb/include/lldb/Utility/ProcessInfo.h (+20) - (modified) lldb/source/API/CMakeLists.txt (+1) - (modified) lldb/source/API/SBPlatform.cpp (+15) - (added) lldb/source/API/SBProcessInfoList.cpp (+73) - (modified) lldb/source/Target/Platform.cpp (+8) - (added) lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py (+54) ``````````diff diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index d392ed43d8c0c9e..b1d88726f754354 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -46,6 +46,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/bindings/interface/SBProcessInfoListExtensions.i b/lldb/bindings/interface/SBProcessInfoListExtensions.i new file mode 100644 index 000000000000000..42999846ef6a52f --- /dev/null +++ b/lldb/bindings/interface/SBProcessInfoListExtensions.i @@ -0,0 +1,13 @@ +%extend lldb::SBProcessInfoList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of process info in a lldb.SBProcessInfoListExtensions object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the process info in a lldb.SBProcessInfoListExtensions object.''' + return lldb_iter(self, 'GetSize', 'GetProcessInfoAtIndex') + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 306cfe683893271..373c2f6cf545cfb 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -122,6 +122,7 @@ %include "lldb/API/SBPlatform.h" %include "lldb/API/SBProcess.h" %include "lldb/API/SBProcessInfo.h" +%include "lldb/API/SBProcessInfoList.h" %include "lldb/API/SBQueue.h" %include "lldb/API/SBQueueItem.h" %include "lldb/API/SBReproducer.h" @@ -184,6 +185,7 @@ %include "./interface/SBModuleSpecExtensions.i" %include "./interface/SBModuleSpecListExtensions.i" %include "./interface/SBProcessExtensions.i" +%include "./interface/SBProcessInfoListExtensions.i" %include "./interface/SBQueueItemExtensions.i" %include "./interface/SBScriptObjectExtensions.i" %include "./interface/SBSectionExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index eacbbeafcf1cd86..f652d1bdb835b59 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -49,6 +49,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index ec5e940fdaf36fc..c6f01cc03f263c8 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -90,6 +90,7 @@ class LLDB_API SBPlatformConnectOptions; class LLDB_API SBPlatformShellCommand; class LLDB_API SBProcess; class LLDB_API SBProcessInfo; +class LLDB_API SBProcessInfoList; class LLDB_API SBQueue; class LLDB_API SBQueueItem; class LLDB_API SBReplayOptions; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index e0acc7003a54bc3..d63d2ed1eaba627 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -11,11 +11,13 @@ #include "lldb/API/SBDefines.h" #include "lldb/API/SBProcess.h" +#include "lldb/API/SBProcessInfoList.h" #include struct PlatformConnectOptions; struct PlatformShellCommand; +class ProcessInstanceInfoMatch; namespace lldb { @@ -154,6 +156,8 @@ class LLDB_API SBPlatform { SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, SBTarget &target, SBError &error); + SBProcessInfoList GetAllProcesses(SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcessInfo.h b/lldb/include/lldb/API/SBProcessInfo.h index 36fae9e842a6136..aec5924e4704a49 100644 --- a/lldb/include/lldb/API/SBProcessInfo.h +++ b/lldb/include/lldb/API/SBProcessInfo.h @@ -55,6 +55,7 @@ class LLDB_API SBProcessInfo { private: friend class SBProcess; + friend class SBProcessInfoList; lldb_private::ProcessInstanceInfo &ref(); diff --git a/lldb/include/lldb/API/SBProcessInfoList.h b/lldb/include/lldb/API/SBProcessInfoList.h new file mode 100644 index 000000000000000..7591fb3db713874 --- /dev/null +++ b/lldb/include/lldb/API/SBProcessInfoList.h @@ -0,0 +1,46 @@ +//===-- SBProcessInfoList.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBPROCESSINSTANCEINFOLIST_H +#define LLDB_API_SBPROCESSINSTANCEINFOLIST_H + +#include "lldb/API/SBDefines.h" + +#include + +namespace lldb_private { +class ProcessInfoList; +} // namespace lldb_private + +namespace lldb { + +class LLDB_API SBProcessInfoList { +public: + SBProcessInfoList(); + ~SBProcessInfoList(); + + SBProcessInfoList(const lldb::SBProcessInfoList &rhs); + + const lldb::SBProcessInfoList &operator=(const lldb::SBProcessInfoList &rhs); + + uint32_t GetSize() const; + + bool GetProcessInfoAtIndex(uint32_t idx, SBProcessInfo &info); + + void Clear(); + +private: + friend SBPlatform; + + SBProcessInfoList(const lldb_private::ProcessInfoList &impl); + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBPROCESSINSTANCEINFOLIST_H diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 08a58c80ef84779..129e4565d9ff993 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -407,6 +407,8 @@ class Platform : public PluginInterface { virtual uint32_t FindProcesses(const ProcessInstanceInfoMatch &match_info, ProcessInstanceInfoList &proc_infos); + ProcessInstanceInfoList GetAllProcesses(); + virtual bool GetProcessInfo(lldb::pid_t pid, ProcessInstanceInfo &proc_info); // Set a breakpoint on all functions that can end up creating a thread for @@ -883,7 +885,7 @@ class Platform : public PluginInterface { } virtual CompilerType GetSiginfoType(const llvm::Triple &triple); - + virtual Args GetExtraStartupCommands(); typedef std::function ProcessInstanceInfoList; +class ProcessInfoList { +public: + ProcessInfoList(const ProcessInstanceInfoList &list) : m_list(list) {} + + uint32_t GetSize() const { return m_list.size(); } + + bool GetProcessInfoAtIndex(uint32_t idx, ProcessInstanceInfo &info) { + if (idx < m_list.size()) { + info = m_list[idx]; + return true; + } + return false; + } + + void Clear() { return m_list.clear(); } + +private: + ProcessInstanceInfoList m_list; +}; + // ProcessInstanceInfoMatch // // A class to help matching one ProcessInstanceInfo to another. diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..fb10d764be02c94 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -61,6 +61,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBPlatform.cpp SBProcess.cpp SBProcessInfo.cpp + SBProcessInfoList.cpp SBQueue.cpp SBQueueItem.cpp SBReproducer.cpp diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index c31848fe04ea72c..3623fd35bcdf13f 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -14,6 +14,7 @@ #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" @@ -599,6 +600,20 @@ SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, return {}; } +SBProcessInfoList SBPlatform::GetAllProcesses(SBError &error) { + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessInstanceInfoList list = platform_sp->GetAllProcesses(); + return SBProcessInfoList(list); + } + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/source/API/SBProcessInfoList.cpp b/lldb/source/API/SBProcessInfoList.cpp new file mode 100644 index 000000000000000..a4d1e353f27d90e --- /dev/null +++ b/lldb/source/API/SBProcessInfoList.cpp @@ -0,0 +1,73 @@ +//===-- SBProcessInfoList.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "Utils.h" +#include "lldb/API/SBProcessInfo.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/ProcessInfo.h" + +using namespace lldb; +using namespace lldb_private; + +SBProcessInfoList::SBProcessInfoList() = default; + +SBProcessInfoList::~SBProcessInfoList() = default; + +SBProcessInfoList::SBProcessInfoList(const ProcessInfoList &impl) + : m_opaque_up(std::make_unique(impl)) { + LLDB_INSTRUMENT_VA(this, impl); +} + +SBProcessInfoList::SBProcessInfoList(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +const lldb::SBProcessInfoList & +SBProcessInfoList::operator=(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +uint32_t SBProcessInfoList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + return m_opaque_up->GetSize(); + + return 0; +} + +void SBProcessInfoList::Clear() { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + m_opaque_up->Clear(); +} + +bool SBProcessInfoList::GetProcessInfoAtIndex(uint32_t idx, + SBProcessInfo &info) { + LLDB_INSTRUMENT_VA(this, idx, info); + + if (m_opaque_up) { + lldb_private::ProcessInstanceInfo process_instance_info; + if (m_opaque_up->GetProcessInfoAtIndex(idx, process_instance_info)) { + info.SetProcessInfo(process_instance_info); + return true; + } + } + + return false; +} diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c117339f07cc9df..c345e33136070f2 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); + FindProcesses(match, processes); + return processes; +} + Status Platform::LaunchProcess(ProcessLaunchInfo &launch_info) { Status error; Log *log = GetLog(LLDBLog::Platform); diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py new file mode 100644 index 000000000000000..be0e3f5f8c50112 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py @@ -0,0 +1,54 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformListProcesses(GDBRemoteTestBase): + @skipIfRemote + @skipIfWindows + def test_get_all_processes(self): + """Test listing processes""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + self.done = False + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + def qsProcessInfo(self): + if not self.done: + self.done = True + return "pid:95126;name:666f6f;" + return "E10" + + self.server.responder = MyPlatformResponder() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(self.server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + processes = platform.GetAllProcesses(error) + self.assertSuccess(error) + self.assertEqual(processes.GetSize(), 2) + self.assertEqual(len(processes), 2) + + process_info = lldb.SBProcessInfo() + processes.GetProcessInfoAtIndex(0, process_info) + self.assertEqual(process_info.GetProcessID(), 95117) + self.assertEqual(process_info.GetName(), "foo") + + processes.GetProcessInfoAtIndex(1, process_info) + self.assertEqual(process_info.GetProcessID(), 95126) + self.assertEqual(process_info.GetName(), "foo") + + platform.DisconnectRemote() ``````````
https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:17 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac5.a70a0220.96e00.1d0d@mx.google.com> https://github.com/medismailben edited https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:17 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac5.170a0220.d75ae.9c87@mx.google.com> https://github.com/medismailben approved this pull request. LGTM with some comments https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:18 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac6.a70a0220.3c641.1f77@mx.google.com> ================ @@ -0,0 +1,73 @@ +//===-- SBProcessInfoList.cpp -------------------------------------===// ---------------- medismailben wrote: same, missing some `---` https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:18 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac6.170a0220.59995.9e8b@mx.google.com> ================ @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); ---------------- medismailben wrote: why do we need this assert here ? https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:18 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac6.170a0220.fc8c6.996d@mx.google.com> ================ @@ -0,0 +1,73 @@ +//===-- SBProcessInfoList.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "Utils.h" ---------------- medismailben wrote: sort ? https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:19:18 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:19:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ac6.050a0220.aaba.1d11@mx.google.com> ================ @@ -0,0 +1,46 @@ +//===-- SBProcessInfoList.h -----------------------------*- C++ -*-===// ---------------- medismailben wrote: This is missing from `----` https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:26:07 2023 From: lldb-commits at lists.llvm.org (Alex Langford via lldb-commits) Date: Thu, 05 Oct 2023 21:26:07 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8c5f.170a0220.14642.983e@mx.google.com> https://github.com/bulbazord approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:35:18 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Thu, 05 Oct 2023 21:35:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8e86.170a0220.697bd.9c5b@mx.google.com> ================ @@ -0,0 +1,73 @@ +//===-- SBProcessInfoList.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "Utils.h" ---------------- JDevlieghere wrote: This is sorted by clang-format, it wants the corresponding header first, then sorter alphabetically. I'll move utils down to avoid this weird looking situation. https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:35:57 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Thu, 05 Oct 2023 21:35:57 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ead.630a0220.fe91a.8364@mx.google.com> ================ @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); ---------------- JDevlieghere wrote: This ensures that the default constructed `ProcessInstanceInfoMatch` actually matches all processes. This is the method that the implementation keys off of, so it seemed appropriate to have sanity check here. https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Thu Oct 5 21:36:38 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Thu, 05 Oct 2023 21:36:38 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f8ed6.170a0220.f0f44.936d@mx.google.com> https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/68378 >From 450be6e0e3e7b9b13f7674fbade9c5ce3bce9d97 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 5 Oct 2023 21:07:03 -0700 Subject: [PATCH] [lldb] Expose SBPlatform::GetAllProcesses to the SB API Add the ability to list all processes through the SB API. rdar://116188959 --- lldb/bindings/headers.swig | 1 + .../interface/SBProcessInfoListExtensions.i | 13 ++++ lldb/bindings/interfaces.swig | 2 + lldb/include/lldb/API/LLDB.h | 1 + lldb/include/lldb/API/SBDefines.h | 1 + lldb/include/lldb/API/SBPlatform.h | 4 + lldb/include/lldb/API/SBProcessInfo.h | 1 + lldb/include/lldb/API/SBProcessInfoList.h | 46 ++++++++++++ lldb/include/lldb/Target/Platform.h | 4 +- lldb/include/lldb/Utility/ProcessInfo.h | 20 +++++ lldb/source/API/CMakeLists.txt | 1 + lldb/source/API/SBPlatform.cpp | 15 ++++ lldb/source/API/SBProcessInfoList.cpp | 74 +++++++++++++++++++ lldb/source/Target/Platform.cpp | 8 ++ .../TestPlatformListProcesses.py | 54 ++++++++++++++ 15 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 lldb/bindings/interface/SBProcessInfoListExtensions.i create mode 100644 lldb/include/lldb/API/SBProcessInfoList.h create mode 100644 lldb/source/API/SBProcessInfoList.cpp create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index d392ed43d8c0c9e..b1d88726f754354 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -46,6 +46,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/bindings/interface/SBProcessInfoListExtensions.i b/lldb/bindings/interface/SBProcessInfoListExtensions.i new file mode 100644 index 000000000000000..42999846ef6a52f --- /dev/null +++ b/lldb/bindings/interface/SBProcessInfoListExtensions.i @@ -0,0 +1,13 @@ +%extend lldb::SBProcessInfoList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of process info in a lldb.SBProcessInfoListExtensions object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the process info in a lldb.SBProcessInfoListExtensions object.''' + return lldb_iter(self, 'GetSize', 'GetProcessInfoAtIndex') + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 306cfe683893271..373c2f6cf545cfb 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -122,6 +122,7 @@ %include "lldb/API/SBPlatform.h" %include "lldb/API/SBProcess.h" %include "lldb/API/SBProcessInfo.h" +%include "lldb/API/SBProcessInfoList.h" %include "lldb/API/SBQueue.h" %include "lldb/API/SBQueueItem.h" %include "lldb/API/SBReproducer.h" @@ -184,6 +185,7 @@ %include "./interface/SBModuleSpecExtensions.i" %include "./interface/SBModuleSpecListExtensions.i" %include "./interface/SBProcessExtensions.i" +%include "./interface/SBProcessInfoListExtensions.i" %include "./interface/SBQueueItemExtensions.i" %include "./interface/SBScriptObjectExtensions.i" %include "./interface/SBSectionExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index eacbbeafcf1cd86..f652d1bdb835b59 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -49,6 +49,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index ec5e940fdaf36fc..c6f01cc03f263c8 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -90,6 +90,7 @@ class LLDB_API SBPlatformConnectOptions; class LLDB_API SBPlatformShellCommand; class LLDB_API SBProcess; class LLDB_API SBProcessInfo; +class LLDB_API SBProcessInfoList; class LLDB_API SBQueue; class LLDB_API SBQueueItem; class LLDB_API SBReplayOptions; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index e0acc7003a54bc3..d63d2ed1eaba627 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -11,11 +11,13 @@ #include "lldb/API/SBDefines.h" #include "lldb/API/SBProcess.h" +#include "lldb/API/SBProcessInfoList.h" #include struct PlatformConnectOptions; struct PlatformShellCommand; +class ProcessInstanceInfoMatch; namespace lldb { @@ -154,6 +156,8 @@ class LLDB_API SBPlatform { SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, SBTarget &target, SBError &error); + SBProcessInfoList GetAllProcesses(SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcessInfo.h b/lldb/include/lldb/API/SBProcessInfo.h index 36fae9e842a6136..aec5924e4704a49 100644 --- a/lldb/include/lldb/API/SBProcessInfo.h +++ b/lldb/include/lldb/API/SBProcessInfo.h @@ -55,6 +55,7 @@ class LLDB_API SBProcessInfo { private: friend class SBProcess; + friend class SBProcessInfoList; lldb_private::ProcessInstanceInfo &ref(); diff --git a/lldb/include/lldb/API/SBProcessInfoList.h b/lldb/include/lldb/API/SBProcessInfoList.h new file mode 100644 index 000000000000000..9d3f65c46fbbe69 --- /dev/null +++ b/lldb/include/lldb/API/SBProcessInfoList.h @@ -0,0 +1,46 @@ +//===-- SBProcessInfoList.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBPROCESSINSTANCEINFOLIST_H +#define LLDB_API_SBPROCESSINSTANCEINFOLIST_H + +#include "lldb/API/SBDefines.h" + +#include + +namespace lldb_private { +class ProcessInfoList; +} // namespace lldb_private + +namespace lldb { + +class LLDB_API SBProcessInfoList { +public: + SBProcessInfoList(); + ~SBProcessInfoList(); + + SBProcessInfoList(const lldb::SBProcessInfoList &rhs); + + const lldb::SBProcessInfoList &operator=(const lldb::SBProcessInfoList &rhs); + + uint32_t GetSize() const; + + bool GetProcessInfoAtIndex(uint32_t idx, SBProcessInfo &info); + + void Clear(); + +private: + friend SBPlatform; + + SBProcessInfoList(const lldb_private::ProcessInfoList &impl); + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBPROCESSINSTANCEINFOLIST_H diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 08a58c80ef84779..129e4565d9ff993 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -407,6 +407,8 @@ class Platform : public PluginInterface { virtual uint32_t FindProcesses(const ProcessInstanceInfoMatch &match_info, ProcessInstanceInfoList &proc_infos); + ProcessInstanceInfoList GetAllProcesses(); + virtual bool GetProcessInfo(lldb::pid_t pid, ProcessInstanceInfo &proc_info); // Set a breakpoint on all functions that can end up creating a thread for @@ -883,7 +885,7 @@ class Platform : public PluginInterface { } virtual CompilerType GetSiginfoType(const llvm::Triple &triple); - + virtual Args GetExtraStartupCommands(); typedef std::function ProcessInstanceInfoList; +class ProcessInfoList { +public: + ProcessInfoList(const ProcessInstanceInfoList &list) : m_list(list) {} + + uint32_t GetSize() const { return m_list.size(); } + + bool GetProcessInfoAtIndex(uint32_t idx, ProcessInstanceInfo &info) { + if (idx < m_list.size()) { + info = m_list[idx]; + return true; + } + return false; + } + + void Clear() { return m_list.clear(); } + +private: + ProcessInstanceInfoList m_list; +}; + // ProcessInstanceInfoMatch // // A class to help matching one ProcessInstanceInfo to another. diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index 7cfa3aaafdae188..fb10d764be02c94 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -61,6 +61,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBPlatform.cpp SBProcess.cpp SBProcessInfo.cpp + SBProcessInfoList.cpp SBQueue.cpp SBQueueItem.cpp SBReproducer.cpp diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index c31848fe04ea72c..3623fd35bcdf13f 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -14,6 +14,7 @@ #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" @@ -599,6 +600,20 @@ SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, return {}; } +SBProcessInfoList SBPlatform::GetAllProcesses(SBError &error) { + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessInstanceInfoList list = platform_sp->GetAllProcesses(); + return SBProcessInfoList(list); + } + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/source/API/SBProcessInfoList.cpp b/lldb/source/API/SBProcessInfoList.cpp new file mode 100644 index 000000000000000..a711bcb58301e61 --- /dev/null +++ b/lldb/source/API/SBProcessInfoList.cpp @@ -0,0 +1,74 @@ +//===-- SBProcessInfoList.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "lldb/API/SBProcessInfo.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/ProcessInfo.h" + +#include "Utils.h" + +using namespace lldb; +using namespace lldb_private; + +SBProcessInfoList::SBProcessInfoList() = default; + +SBProcessInfoList::~SBProcessInfoList() = default; + +SBProcessInfoList::SBProcessInfoList(const ProcessInfoList &impl) + : m_opaque_up(std::make_unique(impl)) { + LLDB_INSTRUMENT_VA(this, impl); +} + +SBProcessInfoList::SBProcessInfoList(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +const lldb::SBProcessInfoList & +SBProcessInfoList::operator=(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +uint32_t SBProcessInfoList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + return m_opaque_up->GetSize(); + + return 0; +} + +void SBProcessInfoList::Clear() { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + m_opaque_up->Clear(); +} + +bool SBProcessInfoList::GetProcessInfoAtIndex(uint32_t idx, + SBProcessInfo &info) { + LLDB_INSTRUMENT_VA(this, idx, info); + + if (m_opaque_up) { + lldb_private::ProcessInstanceInfo process_instance_info; + if (m_opaque_up->GetProcessInfoAtIndex(idx, process_instance_info)) { + info.SetProcessInfo(process_instance_info); + return true; + } + } + + return false; +} diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c117339f07cc9df..c345e33136070f2 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); + FindProcesses(match, processes); + return processes; +} + Status Platform::LaunchProcess(ProcessLaunchInfo &launch_info) { Status error; Log *log = GetLog(LLDBLog::Platform); diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py new file mode 100644 index 000000000000000..be0e3f5f8c50112 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py @@ -0,0 +1,54 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformListProcesses(GDBRemoteTestBase): + @skipIfRemote + @skipIfWindows + def test_get_all_processes(self): + """Test listing processes""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + self.done = False + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + def qsProcessInfo(self): + if not self.done: + self.done = True + return "pid:95126;name:666f6f;" + return "E10" + + self.server.responder = MyPlatformResponder() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(self.server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + processes = platform.GetAllProcesses(error) + self.assertSuccess(error) + self.assertEqual(processes.GetSize(), 2) + self.assertEqual(len(processes), 2) + + process_info = lldb.SBProcessInfo() + processes.GetProcessInfoAtIndex(0, process_info) + self.assertEqual(process_info.GetProcessID(), 95117) + self.assertEqual(process_info.GetName(), "foo") + + processes.GetProcessInfoAtIndex(1, process_info) + self.assertEqual(process_info.GetProcessID(), 95126) + self.assertEqual(process_info.GetName(), "foo") + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Thu Oct 5 21:42:31 2023 From: lldb-commits at lists.llvm.org (Med Ismail Bennani via lldb-commits) Date: Thu, 05 Oct 2023 21:42:31 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <651f9037.170a0220.85f83.9897@mx.google.com> ================ @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); ---------------- medismailben wrote: Sounds reasonable. https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Fri Oct 6 01:07:23 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 01:07:23 -0700 (PDT) Subject: [Lldb-commits] [lldb] f74aaca - [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (#68300) Message-ID: <651fc03b.630a0220.db8f6.9043@mx.google.com> Author: Michael Buch Date: 2023-10-06T09:07:20+01:00 New Revision: f74aaca63202cabb512c78fe19196ff348d436a8 URL: https://github.com/llvm/llvm-project/commit/f74aaca63202cabb512c78fe19196ff348d436a8 DIFF: https://github.com/llvm/llvm-project/commit/f74aaca63202cabb512c78fe19196ff348d436a8.diff LOG: [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (#68300) **Background** Prior to DWARFv4, there was no clear normative text on how to handle static data members. Non-normative text suggested that compilers should use `DW_AT_external` to mark static data members of structrues/unions. Clang does this consistently. However, GCC doesn't, e.g., when the structure/union is in an anonymous namespace (which is C++ standard conformant). Additionally, GCC never emits `DW_AT_data_member_location`s for union members (regardless of storage linkage and storage duration). Since DWARFv5 (issue 161118.1), static data members get emitted as `DW_TAG_variable`. LLDB used to differentiate between static and non-static members by checking the `DW_AT_external` flag and the absence of `DW_AT_data_member_location`. With [D18008](https://reviews.llvm.org/D18008) LLDB started to pretend that union members always have a `0` `DW_AT_data_member_location` by default (because GCC never emits these locations). In [D124409](https://reviews.llvm.org/D124409) LLDB stopped checking the `DW_AT_external` flag to account for the case where GCC doesn't emit the flag for types in anonymous namespaces; instead we only check for presence of `DW_AT_data_member_location`s. The combination of these changes then meant that LLDB would never correctly detect that a union has static data members. **Solution** Instead of unconditionally initializing the `member_byte_offset` to `0` specifically for union members, this patch proposes to check for both the absence of `DW_AT_data_member_location` and `DW_AT_declaration`, which consistently gets emitted for static data members on GCC and Clang. We initialize the `member_byte_offset` to `0` anyway if we determine it wasn't a static. So removing the special case for unions makes this code simpler to reason about. Long-term, we should just use DWARFv5's new representation for static data members. Fixes #68135 Added: lldb/test/API/lang/cpp/union-static-data-members/Makefile lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py lldb/test/API/lang/cpp/union-static-data-members/main.cpp Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 005711d6f488c7f..6e13626d2894313 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2492,8 +2492,9 @@ struct MemberAttributes { DWARFFormValue encoding_form; /// Indicates the byte offset of the word from the base address of the /// structure. - uint32_t member_byte_offset; + uint32_t member_byte_offset = UINT32_MAX; bool is_artificial = false; + bool is_declaration = false; }; /// Parsed form of all attributes that are relevant for parsing Objective-C @@ -2627,8 +2628,6 @@ DiscriminantValue &VariantPart::discriminant() { return this->_discriminant; } MemberAttributes::MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, ModuleSP module_sp) { - member_byte_offset = (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - DWARFAttributes attributes = die.GetAttributes(); for (size_t i = 0; i < attributes.Size(); ++i) { const dw_attr_t attr = attributes.AttributeAtIndex(i); @@ -2669,6 +2668,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, case DW_AT_artificial: is_artificial = form_value.Boolean(); break; + case DW_AT_declaration: + is_declaration = form_value.Boolean(); + break; default: break; } @@ -2875,10 +2877,18 @@ void DWARFASTParserClang::ParseSingleMember( if (class_is_objc_object_or_interface) attrs.accessibility = eAccessNone; - // Handle static members, which is any member that doesn't have a bit or a - // byte member offset. + // Handle static members, which are typically members without + // locations. However, GCC *never* emits DW_AT_data_member_location + // for static data members of unions. + // Non-normative text pre-DWARFv5 recommends marking static + // data members with an DW_AT_external flag. Clang emits this consistently + // whereas GCC emits it only for static data members if not part of an + // anonymous namespace. The flag that is consistently emitted for static + // data members is DW_AT_declaration, so we check it instead. + // FIXME: Since DWARFv5, static data members are marked DW_AT_variable so we + // can consistently detect them on both GCC and Clang without below heuristic. if (attrs.member_byte_offset == UINT32_MAX && - attrs.data_bit_offset == UINT64_MAX) { + attrs.data_bit_offset == UINT64_MAX && attrs.is_declaration) { Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference()); if (var_type) { diff --git a/lldb/test/API/lang/cpp/union-static-data-members/Makefile b/lldb/test/API/lang/cpp/union-static-data-members/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py new file mode 100644 index 000000000000000..47166636b12647c --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -0,0 +1,43 @@ +""" +Tests that frame variable and expr work for +C++ unions and their static data members. +""" +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil + +class CppUnionStaticMembersTestCase(TestBase): + def test(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + + self.expect("frame variable foo", substrs=["val = 42"]) + self.expect("frame variable bar", substrs=["val = 137"]) + + self.expect_expr("foo", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + self.expect_expr("bar", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) + + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") + self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( + name="val", value="42" + )]) + + @expectedFailureAll + def test_union_in_anon_namespace(self): + """Tests that frame variable and expr work + for union static data members in anonymous + namespaces""" + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") + self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( + name="val", value="137" + )]) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/main.cpp b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp new file mode 100644 index 000000000000000..8ba0312cd3a618b --- /dev/null +++ b/lldb/test/API/lang/cpp/union-static-data-members/main.cpp @@ -0,0 +1,25 @@ +union Foo { + int val = 42; + static const int sVal1 = -42; + static Foo sVal2; +}; + +Foo Foo::sVal2{}; + +namespace { +union Bar { + int val = 137; + static const int sVal1 = -137; + static Bar sVal2; +}; + +Bar Bar::sVal2{}; +} // namespace + +int main() { + Foo foo; + Bar bar; + auto sum = Bar::sVal1 + Foo::sVal1 + Foo::sVal2.val + Bar::sVal2.val; + + return 0; +} From lldb-commits at lists.llvm.org Fri Oct 6 01:07:27 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 01:07:27 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651fc03f.170a0220.6e9a0.a396@mx.google.com> https://github.com/Michael137 closed https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Fri Oct 6 02:05:56 2023 From: lldb-commits at lists.llvm.org (Stephen Thomas via lldb-commits) Date: Fri, 06 Oct 2023 02:05:56 -0700 (PDT) Subject: [Lldb-commits] [lldb] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197) In-Reply-To: Message-ID: <651fcdf4.170a0220.b2e5.a899@mx.google.com> https://github.com/stepthomas updated https://github.com/llvm/llvm-project/pull/68197 >From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001 From: Stephen Thomas Date: Wed, 4 Oct 2023 10:16:49 +0100 Subject: [PATCH] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for non-value-returning forms, although actually using them isn't supported by hardware. However, these encodings aren't supported by the backend, meaning that they can't even be assembled or disassembled. Add support for the non-returning encodings, but gate actually using them in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts, which no target uses. This does allow the non-returning instructions to be tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them. The feature does not gate assembling or disassembling them, this is now not an error. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 9 ++++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +++++-- llvm/lib/Target/AMDGPU/FLATInstructions.td | 18 +++++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 ++++++++++++++++--- llvm/test/MC/AMDGPU/gfx1030_err.s | 6 --- 8 files changed, 85 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bf5a7b0a96977c7..9ec062d45ba9f48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf [FeatureFlatGlobalInsts] >; +def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", + "HasAtomicCsubNoRtnInsts", + "true", + "Has buffer_atomic_csub and global_atomic_csub instructions that don't " + "return original value" +>; + def FeatureFlatAtomicFaddF32Inst : SubtargetFeature<"flat-atomic-fadd-f32-inst", "HasFlatAtomicFaddF32Inst", @@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">; def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; +def HasAtomicCsubNoRtnInsts : Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 12ccfd29f26c030..81fc28d293021ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; +defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index cec35d1147bb0ae..3e71141431d7dc9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_dec_x2", VReg_64, i64 >; -let SubtargetPredicate = HasGFX10_BEncoding in -defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < - "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub ->; +let SubtargetPredicate = HasGFX10_BEncoding in { + defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < + "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub + >; + + let OtherPredicates = [HasAtomicCsubNoRtnInsts] in + defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN < + "buffer_atomic_csub", VGPR_32, i32 + >; +} let SubtargetPredicate = isGFX8GFX9 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; @@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; +let SubtargetPredicate = HasAtomicCsubNoRtnInsts in +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; + let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; @@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; -defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>; +defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>; defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 7d2286c5607743a..63453d6c990dcf1 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -870,9 +870,14 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", VReg_64, i64>; -let SubtargetPredicate = HasGFX10_BEncoding in -defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", - VGPR_32, i32>; +let SubtargetPredicate = HasGFX10_BEncoding in { + defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", + VGPR_32, i32>; + + let OtherPredicates = [HasAtomicCsubNoRtnInsts] in + defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_NO_RTN <"global_atomic_csub", + VGPR_32, i32>; +} defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; @@ -1442,6 +1447,9 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; +let OtherPredicates = [HasAtomicCsubNoRtnInsts] in +defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; + defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; @@ -2102,7 +2110,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; -defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; +defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; @@ -2333,7 +2341,7 @@ defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATO defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; -defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>; +defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>; defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ce538f086cc368e..eb3561c046f5309 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -160,6 +160,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFaddNoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; + bool HasAtomicCsubNoRtnInsts = false; bool HasAtomicGlobalPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; @@ -1203,6 +1204,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable. bool hasCvtFP8VOP1Bug() const { return true; } + // \returns true is CSUB atomics support a no-return form. + bool hasAtomicCsubNoRtnInsts() const { return HasAtomicCsubNoRtnInsts; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 60a6964c754ff64..f09ca954904fc62 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -210,6 +210,7 @@ defm SIbuffer_atomic_or : SDBufferAtomicNoRet; defm SIbuffer_atomic_xor : SDBufferAtomicNoRet; defm SIbuffer_atomic_inc : SDBufferAtomicNoRet; defm SIbuffer_atomic_dec : SDBufferAtomicNoRet; +defm SIbuffer_atomic_csub : SDBufferAtomicNoRet; defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet; defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet; defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll index e96a3545cbd77e5..a046179636cd3ad 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll @@ -4,35 +4,70 @@ declare i32 @llvm.amdgcn.buffer.atomic.csub(i32, <4 x i32>, i32, i32, i1) declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32) -; GCN-LABEL: {{^}}buffer_atomic_csub: +; GCN-LABEL: {{^}}buffer_atomic_csub_rtn: ; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen glc -define amdgpu_ps void @buffer_atomic_csub(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) { +define amdgpu_ps void @buffer_atomic_csub_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) { main_body: %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) ret void } -; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc: +; GCN-LABEL: {{^}}buffer_atomic_csub_no_rtn: +; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen +define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 { +main_body: + %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) + ret void +} + +; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_rtn: ; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 glc slc -define amdgpu_ps void @buffer_atomic_csub_off4_slc(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) { +define amdgpu_ps void @buffer_atomic_csub_off4_slc_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) { main_body: %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) ret void } -; GCN-LABEL: {{^}}global_atomic_csub: +; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_no_rtn: +; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 slc +define amdgpu_ps void @buffer_atomic_csub_off4_slc_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 { +main_body: + %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_rtn: ; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc -define amdgpu_kernel void @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) { +main_body: + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_no_rtn: +; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { main_body: %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) ret void } -; GCN-LABEL: {{^}}global_atomic_csub_off4: +; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn: ; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc -define amdgpu_kernel void @global_atomic_csub_off4(ptr addrspace(1) %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) { main_body: %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) ret void } + +; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn: +; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { +main_body: + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) + ret void +} + +attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" } diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 10ffa9b73a04f5f..ba8784a39c3698f 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -141,12 +141,6 @@ ds_write_src2_b32 v1 offset:65535 ds_write_src2_b64 v1 offset:65535 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU -buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc - -global_atomic_csub v2, v[0:1], v2, off offset:100 slc -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc - image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type From lldb-commits at lists.llvm.org Fri Oct 6 02:19:19 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 02:19:19 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fd117.170a0220.d0d6b.9316@mx.google.com> https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/65852 >From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 9 Sep 2023 23:07:29 +0800 Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 62 ++++++ .../test/Transforms/InstCombine/icmp-range.ll | 181 ++++++++++++++++++ 2 files changed, 243 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9fdc46fec631679..837b8e6d2619989 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6309,7 +6309,69 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + ICmpInst::Predicate Pred1, Pred2; const APInt *C; + // icmp eq/ne X, (zext (icmp eq/ne X, C)) + if (match(&I, m_c_ICmp(Pred1, m_Value(X), + m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + if (C->isZero()) { + if (Pred2 == ICmpInst::ICMP_EQ) { + // icmp eq X, (zext (icmp eq X, 0)) --> false + // icmp ne X, (zext (icmp eq X, 0)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else if (C->isOne()) { + if (Pred2 == ICmpInst::ICMP_NE) { + // icmp eq X, (zext (icmp ne X, 1)) --> false + // icmp ne X, (zext (icmp ne X, 1)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else { + // C != 0 && C != 1 + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + return ICmpInst::Create( + Instruction::ICmp, Pred1, X, + Constant::getIntegerValue( + X->getType(), + APInt(X->getType()->getScalarSizeInBits(), + static_cast(Pred2 == ICmpInst::ICMP_NE)))); + } + } + if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) && match(I.getOperand(1), m_APInt(C)) && X->getType()->isIntOrIntVectorTy(1) && diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 4281e09cb0309c8..15424fce33fdeea 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) { ret i1 %cmp } +define i1 @icmp_ne_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_zero( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_one( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From bf79e8624a1578c65ca3adc4c3c95512c0e18d53 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 22:36:02 +0800 Subject: [PATCH 02/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d0b62c17ec94358..d1f141bcf0e7df7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,10 +6391,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp eq X, 0)) --> false // icmp ne X, (zext (icmp eq X, 0)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 @@ -6412,10 +6409,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 >From ba475e31713758724305acbff496cbe605888da8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 23:00:17 +0800 Subject: [PATCH 03/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../Transforms/InstCombine/InstCombineCompares.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d1f141bcf0e7df7..c5e3ad8a55741fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6400,9 +6400,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else if (C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,9 +6416,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { // C != 0 && C != 1 @@ -6430,10 +6426,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - Constant::getIntegerValue( - X->getType(), - APInt(X->getType()->getScalarSizeInBits(), - static_cast(Pred2 == ICmpInst::ICMP_NE)))); + ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); } } >From 70a70fb44d0e628a1cf485e1767ada3eaaa26b0f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:30:11 +0800 Subject: [PATCH 04/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++ .../InstCombine/InstCombineCompares.cpp | 50 +++++++++++++------ 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 13877538f79de6d..38d40d1ec9a839e 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,6 +767,28 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } +/// Match a pattern, capturing the value if we match. +template struct capture_ty { + SubPattern_t SubPattern; + Class *&VR; + + capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} + + template bool match(ITy *V) { + if (auto *CV = dyn_cast(V)) { + VR = CV; + return SubPattern.match(V); + } + return false; + } +}; + +template +inline capture_ty m_Instruction(Instruction *&I, + const T &SubPattern) { + return capture_ty(SubPattern, I); +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c5e3ad8a55741fb..aca8611026ef1ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6380,53 +6380,71 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + // icmp eq/ne X, (zext/sext (icmp eq/ne X, C)) ICmpInst::Predicate Pred1, Pred2; const APInt *C; - // icmp eq/ne X, (zext (icmp eq/ne X, C)) + Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && - ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + m_Instruction(ExtI, + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { + bool IsSExt = ExtI->getOpcode() == Instruction::SExt; + bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { - // icmp eq X, (zext (icmp eq X, 0)) --> false - // icmp ne X, (zext (icmp eq X, 0)) --> true + // icmp eq X, (zext/sext (icmp eq X, 0)) --> false + // icmp ne X, (zext/sext (icmp eq X, 0)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } - } else if (C->isOne()) { + } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true + // icmp eq X, (sext (icmp ne X, -1)) --> false + // icmp ne X, (sext (icmp ne X, -1)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { - // C != 0 && C != 1 - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != 1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != -1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); + ConstantInt::get(X->getType(), + Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); } } >From 418562d5dbd25167d3f9b2c61fb7265581ee99d4 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:39:23 +0800 Subject: [PATCH 05/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ----- .../InstCombine/InstCombineCompares.cpp | 6 +- .../Transforms/InstCombine/and-or-icmps.ll | 17 +--- .../test/Transforms/InstCombine/icmp-range.ll | 82 ++++++------------- 4 files changed, 31 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 38d40d1ec9a839e..13877538f79de6d 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,28 +767,6 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } -/// Match a pattern, capturing the value if we match. -template struct capture_ty { - SubPattern_t SubPattern; - Class *&VR; - - capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} - - template bool match(ITy *V) { - if (auto *CV = dyn_cast(V)) { - VR = CV; - return SubPattern.match(V); - } - return false; - } -}; - -template -inline capture_ty m_Instruction(Instruction *&I, - const T &SubPattern) { - return capture_ty(SubPattern, I); -} - /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index aca8611026ef1ca..b72bde885de124b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6385,9 +6385,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { const APInt *C; Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_Instruction(ExtI, - m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_CombineAnd(m_Instruction(ExtI), + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 065dbf261e131bf..2c887d574d397f6 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,19 +366,10 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 -; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 -; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] -; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] -; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] -; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] +; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index f7efff9f675373a..3a40755384f2a21 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1164,7 +1164,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { } define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { -; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( ; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, @@ -1218,10 +1218,7 @@ define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { define i1 @icmp_ne_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1231,9 +1228,8 @@ define i1 @icmp_ne_sext_eq_zero(i32 %a) { define i1 @icmp_ne_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1244,10 +1240,7 @@ define i1 @icmp_ne_sext_ne_zero(i32 %a) { define i1 @icmp_eq_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1257,9 +1250,8 @@ define i1 @icmp_eq_sext_eq_zero(i32 %a) { define i1 @icmp_eq_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1270,9 +1262,8 @@ define i1 @icmp_eq_sext_ne_zero(i32 %a) { define i1 @icmp_ne_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1283,10 +1274,7 @@ define i1 @icmp_ne_sext_eq_allones(i32 %a) { define i1 @icmp_ne_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1296,9 +1284,8 @@ define i1 @icmp_ne_sext_ne_allones(i32 %a) { define i1 @icmp_eq_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1309,10 +1296,7 @@ define i1 @icmp_eq_sext_eq_allones(i32 %a) { define i1 @icmp_eq_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1322,9 +1306,7 @@ define i1 @icmp_eq_sext_ne_allones(i32 %a) { define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1335,9 +1317,7 @@ define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1348,9 +1328,7 @@ define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1361,9 +1339,7 @@ define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1374,10 +1350,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1387,9 +1360,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1400,9 +1372,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1413,10 +1384,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1426,9 +1394,7 @@ define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, >From b7565ccc7090e15f6330da274225420691a4160d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 10:24:50 +0800 Subject: [PATCH 06/10] [InstCombine] Fix comments `zext` -> `sext`. NFC. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b72bde885de124b..c7b9366bfd45e41 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6437,10 +6437,10 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 // when C != 0 && C != -1: - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 + // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, ConstantInt::get(X->getType(), >From 5b5052403968d4b394b30779c8912a1e732e9998 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 17:59:26 +0800 Subject: [PATCH 07/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7b9366bfd45e41..7c7ab62c64dac89 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6390,6 +6390,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); + auto CreateRangeCheck = [&] { + Value *V1 = Constant::getNullValue(X->getType()); + Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + return BinaryOperator::Create( + Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, + Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { // icmp eq X, (zext/sext (icmp eq X, 0)) --> false @@ -6397,17 +6404,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,17 +6419,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else { // when C != 0 && C != 1: >From 55d52b1f05004abe6c4187dc07437580c7f5aa73 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 21:49:47 +0800 Subject: [PATCH 08/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c7ab62c64dac89..9f2d0c8110f79e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,11 +6391,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { - Value *V1 = Constant::getNullValue(X->getType()); - Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + Value *CmpV1 = + Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); + Value *CmpV2 = Builder.CreateICmp( + Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, - Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + CmpV1, CmpV2); }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { >From c0d8f8193fa1620db1f84379f2316fcf4b401e4c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Oct 2023 20:10:57 +0800 Subject: [PATCH 09/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 10 ++++---- .../Transforms/InstCombine/and-or-icmps.ll | 17 ++++++++++---- .../test/Transforms/InstCombine/icmp-range.ll | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f2d0c8110f79e0..4fca8859dea7acc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6387,14 +6387,15 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { if (match(&I, m_c_ICmp(Pred1, m_Value(X), m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_APInt(C)))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { Value *CmpV1 = Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); Value *CmpV2 = Builder.CreateICmp( - Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); + Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, CmpV1, CmpV2); @@ -6440,8 +6441,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), - Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); + ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE + ? (IsSExt ? -1 : 1) + : 0)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2c887d574d397f6..065dbf261e131bf 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,10 +366,19 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 +; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 +; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] +; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 +; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] +; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true +; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 3a40755384f2a21..79790b7458d4219 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1403,6 +1403,29 @@ define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ret <2 x i1> %cmp1 } +define i1 @icmp_ne_sext_ne_zero_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_zero_i128( +; CHECK-NEXT: [[TMP1:%.*]] = add i128 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i128 [[TMP1]], -2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 0 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_otherwise_i128( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i128 [[A:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 2 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From 2285a2c6b8e66cf35aed6151c61e841f1349817f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 6 Oct 2023 17:17:53 +0800 Subject: [PATCH 10/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` Add negative tests --- .../test/Transforms/InstCombine/icmp-range.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 17903be937057ab..7af06e03fd4b2a9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1426,6 +1426,85 @@ define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { ret i1 %cmp1 } +; Negative tests with non-equality predicates +define i1 @icmp_ne_sext_sgt_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_sgt_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp sgt i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + ; tests from PR59555 define i1 @isFloat(i64 %0) { ; CHECK-LABEL: @isFloat( From lldb-commits at lists.llvm.org Fri Oct 6 02:47:34 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 02:47:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <651fd7b6.650a0220.31787.2c48@mx.google.com> https://github.com/omjavaid commented: After this change isAArch64SME check will be replaced on all locations? So the manual says "The SVE FFR predicate register is not architecturally visible when the PE is in Streaming SVE mode if FEAT_SME_FA64 is not implemented or not enabled at the current Exception level." All SVE tests seem to use FFR does it mean isAArch64SME is now redundant as far as LLDB testing is concerned? https://github.com/llvm/llvm-project/pull/68094 From lldb-commits at lists.llvm.org Fri Oct 6 02:50:24 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 02:50:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fd860.a70a0220.2dde.2a8f@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. ---------------- omjavaid wrote: elsewhere seems typo. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 02:50:44 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 02:50:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fd874.170a0220.85f83.a7aa@mx.google.com> https://github.com/omjavaid deleted https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 02:57:39 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 02:57:39 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fda13.a70a0220.325a2.2e6e@mx.google.com> https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/65852 >From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 9 Sep 2023 23:07:29 +0800 Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 62 ++++++ .../test/Transforms/InstCombine/icmp-range.ll | 181 ++++++++++++++++++ 2 files changed, 243 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9fdc46fec631679..837b8e6d2619989 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6309,7 +6309,69 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + ICmpInst::Predicate Pred1, Pred2; const APInt *C; + // icmp eq/ne X, (zext (icmp eq/ne X, C)) + if (match(&I, m_c_ICmp(Pred1, m_Value(X), + m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + if (C->isZero()) { + if (Pred2 == ICmpInst::ICMP_EQ) { + // icmp eq X, (zext (icmp eq X, 0)) --> false + // icmp ne X, (zext (icmp eq X, 0)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else if (C->isOne()) { + if (Pred2 == ICmpInst::ICMP_NE) { + // icmp eq X, (zext (icmp ne X, 1)) --> false + // icmp ne X, (zext (icmp ne X, 1)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else { + // C != 0 && C != 1 + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + return ICmpInst::Create( + Instruction::ICmp, Pred1, X, + Constant::getIntegerValue( + X->getType(), + APInt(X->getType()->getScalarSizeInBits(), + static_cast(Pred2 == ICmpInst::ICMP_NE)))); + } + } + if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) && match(I.getOperand(1), m_APInt(C)) && X->getType()->isIntOrIntVectorTy(1) && diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 4281e09cb0309c8..15424fce33fdeea 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) { ret i1 %cmp } +define i1 @icmp_ne_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_zero( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_one( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From bf79e8624a1578c65ca3adc4c3c95512c0e18d53 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 22:36:02 +0800 Subject: [PATCH 02/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d0b62c17ec94358..d1f141bcf0e7df7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,10 +6391,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp eq X, 0)) --> false // icmp ne X, (zext (icmp eq X, 0)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 @@ -6412,10 +6409,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 >From ba475e31713758724305acbff496cbe605888da8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 23:00:17 +0800 Subject: [PATCH 03/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../Transforms/InstCombine/InstCombineCompares.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d1f141bcf0e7df7..c5e3ad8a55741fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6400,9 +6400,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else if (C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,9 +6416,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { // C != 0 && C != 1 @@ -6430,10 +6426,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - Constant::getIntegerValue( - X->getType(), - APInt(X->getType()->getScalarSizeInBits(), - static_cast(Pred2 == ICmpInst::ICMP_NE)))); + ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); } } >From 70a70fb44d0e628a1cf485e1767ada3eaaa26b0f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:30:11 +0800 Subject: [PATCH 04/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++ .../InstCombine/InstCombineCompares.cpp | 50 +++++++++++++------ 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 13877538f79de6d..38d40d1ec9a839e 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,6 +767,28 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } +/// Match a pattern, capturing the value if we match. +template struct capture_ty { + SubPattern_t SubPattern; + Class *&VR; + + capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} + + template bool match(ITy *V) { + if (auto *CV = dyn_cast(V)) { + VR = CV; + return SubPattern.match(V); + } + return false; + } +}; + +template +inline capture_ty m_Instruction(Instruction *&I, + const T &SubPattern) { + return capture_ty(SubPattern, I); +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c5e3ad8a55741fb..aca8611026ef1ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6380,53 +6380,71 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + // icmp eq/ne X, (zext/sext (icmp eq/ne X, C)) ICmpInst::Predicate Pred1, Pred2; const APInt *C; - // icmp eq/ne X, (zext (icmp eq/ne X, C)) + Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && - ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + m_Instruction(ExtI, + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { + bool IsSExt = ExtI->getOpcode() == Instruction::SExt; + bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { - // icmp eq X, (zext (icmp eq X, 0)) --> false - // icmp ne X, (zext (icmp eq X, 0)) --> true + // icmp eq X, (zext/sext (icmp eq X, 0)) --> false + // icmp ne X, (zext/sext (icmp eq X, 0)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } - } else if (C->isOne()) { + } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true + // icmp eq X, (sext (icmp ne X, -1)) --> false + // icmp ne X, (sext (icmp ne X, -1)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { - // C != 0 && C != 1 - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != 1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != -1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); + ConstantInt::get(X->getType(), + Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); } } >From 418562d5dbd25167d3f9b2c61fb7265581ee99d4 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:39:23 +0800 Subject: [PATCH 05/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ----- .../InstCombine/InstCombineCompares.cpp | 6 +- .../Transforms/InstCombine/and-or-icmps.ll | 17 +--- .../test/Transforms/InstCombine/icmp-range.ll | 82 ++++++------------- 4 files changed, 31 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 38d40d1ec9a839e..13877538f79de6d 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,28 +767,6 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } -/// Match a pattern, capturing the value if we match. -template struct capture_ty { - SubPattern_t SubPattern; - Class *&VR; - - capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} - - template bool match(ITy *V) { - if (auto *CV = dyn_cast(V)) { - VR = CV; - return SubPattern.match(V); - } - return false; - } -}; - -template -inline capture_ty m_Instruction(Instruction *&I, - const T &SubPattern) { - return capture_ty(SubPattern, I); -} - /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index aca8611026ef1ca..b72bde885de124b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6385,9 +6385,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { const APInt *C; Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_Instruction(ExtI, - m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_CombineAnd(m_Instruction(ExtI), + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 065dbf261e131bf..2c887d574d397f6 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,19 +366,10 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 -; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 -; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] -; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] -; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] -; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] +; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index f7efff9f675373a..3a40755384f2a21 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1164,7 +1164,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { } define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { -; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( ; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, @@ -1218,10 +1218,7 @@ define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { define i1 @icmp_ne_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1231,9 +1228,8 @@ define i1 @icmp_ne_sext_eq_zero(i32 %a) { define i1 @icmp_ne_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1244,10 +1240,7 @@ define i1 @icmp_ne_sext_ne_zero(i32 %a) { define i1 @icmp_eq_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1257,9 +1250,8 @@ define i1 @icmp_eq_sext_eq_zero(i32 %a) { define i1 @icmp_eq_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1270,9 +1262,8 @@ define i1 @icmp_eq_sext_ne_zero(i32 %a) { define i1 @icmp_ne_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1283,10 +1274,7 @@ define i1 @icmp_ne_sext_eq_allones(i32 %a) { define i1 @icmp_ne_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1296,9 +1284,8 @@ define i1 @icmp_ne_sext_ne_allones(i32 %a) { define i1 @icmp_eq_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1309,10 +1296,7 @@ define i1 @icmp_eq_sext_eq_allones(i32 %a) { define i1 @icmp_eq_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1322,9 +1306,7 @@ define i1 @icmp_eq_sext_ne_allones(i32 %a) { define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1335,9 +1317,7 @@ define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1348,9 +1328,7 @@ define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1361,9 +1339,7 @@ define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1374,10 +1350,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1387,9 +1360,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1400,9 +1372,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1413,10 +1384,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1426,9 +1394,7 @@ define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, >From b7565ccc7090e15f6330da274225420691a4160d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 10:24:50 +0800 Subject: [PATCH 06/10] [InstCombine] Fix comments `zext` -> `sext`. NFC. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b72bde885de124b..c7b9366bfd45e41 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6437,10 +6437,10 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 // when C != 0 && C != -1: - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 + // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, ConstantInt::get(X->getType(), >From 5b5052403968d4b394b30779c8912a1e732e9998 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 17:59:26 +0800 Subject: [PATCH 07/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7b9366bfd45e41..7c7ab62c64dac89 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6390,6 +6390,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); + auto CreateRangeCheck = [&] { + Value *V1 = Constant::getNullValue(X->getType()); + Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + return BinaryOperator::Create( + Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, + Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { // icmp eq X, (zext/sext (icmp eq X, 0)) --> false @@ -6397,17 +6404,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,17 +6419,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else { // when C != 0 && C != 1: >From 55d52b1f05004abe6c4187dc07437580c7f5aa73 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 21:49:47 +0800 Subject: [PATCH 08/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c7ab62c64dac89..9f2d0c8110f79e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,11 +6391,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { - Value *V1 = Constant::getNullValue(X->getType()); - Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + Value *CmpV1 = + Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); + Value *CmpV2 = Builder.CreateICmp( + Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, - Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + CmpV1, CmpV2); }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { >From c0d8f8193fa1620db1f84379f2316fcf4b401e4c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Oct 2023 20:10:57 +0800 Subject: [PATCH 09/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 10 ++++---- .../Transforms/InstCombine/and-or-icmps.ll | 17 ++++++++++---- .../test/Transforms/InstCombine/icmp-range.ll | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f2d0c8110f79e0..4fca8859dea7acc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6387,14 +6387,15 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { if (match(&I, m_c_ICmp(Pred1, m_Value(X), m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_APInt(C)))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { Value *CmpV1 = Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); Value *CmpV2 = Builder.CreateICmp( - Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); + Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, CmpV1, CmpV2); @@ -6440,8 +6441,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), - Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); + ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE + ? (IsSExt ? -1 : 1) + : 0)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2c887d574d397f6..065dbf261e131bf 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,10 +366,19 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 +; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 +; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] +; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 +; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] +; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true +; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 3a40755384f2a21..79790b7458d4219 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1403,6 +1403,29 @@ define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ret <2 x i1> %cmp1 } +define i1 @icmp_ne_sext_ne_zero_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_zero_i128( +; CHECK-NEXT: [[TMP1:%.*]] = add i128 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i128 [[TMP1]], -2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 0 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_otherwise_i128( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i128 [[A:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 2 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From 2285a2c6b8e66cf35aed6151c61e841f1349817f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 6 Oct 2023 17:17:53 +0800 Subject: [PATCH 10/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` Add negative tests --- .../test/Transforms/InstCombine/icmp-range.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 17903be937057ab..7af06e03fd4b2a9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1426,6 +1426,85 @@ define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { ret i1 %cmp1 } +; Negative tests with non-equality predicates +define i1 @icmp_ne_sext_sgt_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_sgt_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp sgt i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + ; tests from PR59555 define i1 @isFloat(i64 %0) { ; CHECK-LABEL: @isFloat( From lldb-commits at lists.llvm.org Fri Oct 6 02:57:46 2023 From: lldb-commits at lists.llvm.org (Matthias Springer via lldb-commits) Date: Fri, 06 Oct 2023 02:57:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fda1a.170a0220.3d188.aa24@mx.google.com> https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/65852 >From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 9 Sep 2023 23:07:29 +0800 Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 62 ++++++ .../test/Transforms/InstCombine/icmp-range.ll | 181 ++++++++++++++++++ 2 files changed, 243 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9fdc46fec631679..837b8e6d2619989 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6309,7 +6309,69 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + ICmpInst::Predicate Pred1, Pred2; const APInt *C; + // icmp eq/ne X, (zext (icmp eq/ne X, C)) + if (match(&I, m_c_ICmp(Pred1, m_Value(X), + m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + if (C->isZero()) { + if (Pred2 == ICmpInst::ICMP_EQ) { + // icmp eq X, (zext (icmp eq X, 0)) --> false + // icmp ne X, (zext (icmp eq X, 0)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else if (C->isOne()) { + if (Pred2 == ICmpInst::ICMP_NE) { + // icmp eq X, (zext (icmp ne X, 1)) --> false + // icmp ne X, (zext (icmp ne X, 1)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else { + // C != 0 && C != 1 + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + return ICmpInst::Create( + Instruction::ICmp, Pred1, X, + Constant::getIntegerValue( + X->getType(), + APInt(X->getType()->getScalarSizeInBits(), + static_cast(Pred2 == ICmpInst::ICMP_NE)))); + } + } + if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) && match(I.getOperand(1), m_APInt(C)) && X->getType()->isIntOrIntVectorTy(1) && diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 4281e09cb0309c8..15424fce33fdeea 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) { ret i1 %cmp } +define i1 @icmp_ne_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_zero( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_one( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From bf79e8624a1578c65ca3adc4c3c95512c0e18d53 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 22:36:02 +0800 Subject: [PATCH 02/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d0b62c17ec94358..d1f141bcf0e7df7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,10 +6391,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp eq X, 0)) --> false // icmp ne X, (zext (icmp eq X, 0)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 @@ -6412,10 +6409,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 >From ba475e31713758724305acbff496cbe605888da8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 23:00:17 +0800 Subject: [PATCH 03/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../Transforms/InstCombine/InstCombineCompares.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d1f141bcf0e7df7..c5e3ad8a55741fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6400,9 +6400,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else if (C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,9 +6416,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { // C != 0 && C != 1 @@ -6430,10 +6426,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - Constant::getIntegerValue( - X->getType(), - APInt(X->getType()->getScalarSizeInBits(), - static_cast(Pred2 == ICmpInst::ICMP_NE)))); + ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); } } >From 70a70fb44d0e628a1cf485e1767ada3eaaa26b0f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:30:11 +0800 Subject: [PATCH 04/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++ .../InstCombine/InstCombineCompares.cpp | 50 +++++++++++++------ 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 13877538f79de6d..38d40d1ec9a839e 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,6 +767,28 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } +/// Match a pattern, capturing the value if we match. +template struct capture_ty { + SubPattern_t SubPattern; + Class *&VR; + + capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} + + template bool match(ITy *V) { + if (auto *CV = dyn_cast(V)) { + VR = CV; + return SubPattern.match(V); + } + return false; + } +}; + +template +inline capture_ty m_Instruction(Instruction *&I, + const T &SubPattern) { + return capture_ty(SubPattern, I); +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c5e3ad8a55741fb..aca8611026ef1ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6380,53 +6380,71 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + // icmp eq/ne X, (zext/sext (icmp eq/ne X, C)) ICmpInst::Predicate Pred1, Pred2; const APInt *C; - // icmp eq/ne X, (zext (icmp eq/ne X, C)) + Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && - ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + m_Instruction(ExtI, + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { + bool IsSExt = ExtI->getOpcode() == Instruction::SExt; + bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { - // icmp eq X, (zext (icmp eq X, 0)) --> false - // icmp ne X, (zext (icmp eq X, 0)) --> true + // icmp eq X, (zext/sext (icmp eq X, 0)) --> false + // icmp ne X, (zext/sext (icmp eq X, 0)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } - } else if (C->isOne()) { + } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true + // icmp eq X, (sext (icmp ne X, -1)) --> false + // icmp ne X, (sext (icmp ne X, -1)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { - // C != 0 && C != 1 - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != 1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != -1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); + ConstantInt::get(X->getType(), + Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); } } >From 418562d5dbd25167d3f9b2c61fb7265581ee99d4 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:39:23 +0800 Subject: [PATCH 05/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ----- .../InstCombine/InstCombineCompares.cpp | 6 +- .../Transforms/InstCombine/and-or-icmps.ll | 17 +--- .../test/Transforms/InstCombine/icmp-range.ll | 82 ++++++------------- 4 files changed, 31 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 38d40d1ec9a839e..13877538f79de6d 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,28 +767,6 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } -/// Match a pattern, capturing the value if we match. -template struct capture_ty { - SubPattern_t SubPattern; - Class *&VR; - - capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} - - template bool match(ITy *V) { - if (auto *CV = dyn_cast(V)) { - VR = CV; - return SubPattern.match(V); - } - return false; - } -}; - -template -inline capture_ty m_Instruction(Instruction *&I, - const T &SubPattern) { - return capture_ty(SubPattern, I); -} - /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index aca8611026ef1ca..b72bde885de124b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6385,9 +6385,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { const APInt *C; Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_Instruction(ExtI, - m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_CombineAnd(m_Instruction(ExtI), + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 065dbf261e131bf..2c887d574d397f6 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,19 +366,10 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 -; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 -; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] -; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] -; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] -; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] +; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index f7efff9f675373a..3a40755384f2a21 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1164,7 +1164,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { } define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { -; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( ; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, @@ -1218,10 +1218,7 @@ define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { define i1 @icmp_ne_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1231,9 +1228,8 @@ define i1 @icmp_ne_sext_eq_zero(i32 %a) { define i1 @icmp_ne_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1244,10 +1240,7 @@ define i1 @icmp_ne_sext_ne_zero(i32 %a) { define i1 @icmp_eq_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1257,9 +1250,8 @@ define i1 @icmp_eq_sext_eq_zero(i32 %a) { define i1 @icmp_eq_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1270,9 +1262,8 @@ define i1 @icmp_eq_sext_ne_zero(i32 %a) { define i1 @icmp_ne_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1283,10 +1274,7 @@ define i1 @icmp_ne_sext_eq_allones(i32 %a) { define i1 @icmp_ne_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1296,9 +1284,8 @@ define i1 @icmp_ne_sext_ne_allones(i32 %a) { define i1 @icmp_eq_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1309,10 +1296,7 @@ define i1 @icmp_eq_sext_eq_allones(i32 %a) { define i1 @icmp_eq_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1322,9 +1306,7 @@ define i1 @icmp_eq_sext_ne_allones(i32 %a) { define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1335,9 +1317,7 @@ define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1348,9 +1328,7 @@ define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1361,9 +1339,7 @@ define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1374,10 +1350,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1387,9 +1360,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1400,9 +1372,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1413,10 +1384,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1426,9 +1394,7 @@ define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, >From b7565ccc7090e15f6330da274225420691a4160d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 10:24:50 +0800 Subject: [PATCH 06/10] [InstCombine] Fix comments `zext` -> `sext`. NFC. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b72bde885de124b..c7b9366bfd45e41 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6437,10 +6437,10 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 // when C != 0 && C != -1: - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 + // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, ConstantInt::get(X->getType(), >From 5b5052403968d4b394b30779c8912a1e732e9998 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 17:59:26 +0800 Subject: [PATCH 07/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7b9366bfd45e41..7c7ab62c64dac89 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6390,6 +6390,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); + auto CreateRangeCheck = [&] { + Value *V1 = Constant::getNullValue(X->getType()); + Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + return BinaryOperator::Create( + Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, + Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { // icmp eq X, (zext/sext (icmp eq X, 0)) --> false @@ -6397,17 +6404,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,17 +6419,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else { // when C != 0 && C != 1: >From 55d52b1f05004abe6c4187dc07437580c7f5aa73 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 21:49:47 +0800 Subject: [PATCH 08/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c7ab62c64dac89..9f2d0c8110f79e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,11 +6391,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { - Value *V1 = Constant::getNullValue(X->getType()); - Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + Value *CmpV1 = + Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); + Value *CmpV2 = Builder.CreateICmp( + Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, - Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + CmpV1, CmpV2); }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { >From c0d8f8193fa1620db1f84379f2316fcf4b401e4c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Oct 2023 20:10:57 +0800 Subject: [PATCH 09/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 10 ++++---- .../Transforms/InstCombine/and-or-icmps.ll | 17 ++++++++++---- .../test/Transforms/InstCombine/icmp-range.ll | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f2d0c8110f79e0..4fca8859dea7acc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6387,14 +6387,15 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { if (match(&I, m_c_ICmp(Pred1, m_Value(X), m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_APInt(C)))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { Value *CmpV1 = Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); Value *CmpV2 = Builder.CreateICmp( - Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); + Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, CmpV1, CmpV2); @@ -6440,8 +6441,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), - Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); + ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE + ? (IsSExt ? -1 : 1) + : 0)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2c887d574d397f6..065dbf261e131bf 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,10 +366,19 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 +; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 +; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] +; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 +; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] +; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true +; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 3a40755384f2a21..79790b7458d4219 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1403,6 +1403,29 @@ define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ret <2 x i1> %cmp1 } +define i1 @icmp_ne_sext_ne_zero_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_zero_i128( +; CHECK-NEXT: [[TMP1:%.*]] = add i128 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i128 [[TMP1]], -2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 0 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_otherwise_i128( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i128 [[A:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 2 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From 2285a2c6b8e66cf35aed6151c61e841f1349817f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 6 Oct 2023 17:17:53 +0800 Subject: [PATCH 10/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` Add negative tests --- .../test/Transforms/InstCombine/icmp-range.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 17903be937057ab..7af06e03fd4b2a9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1426,6 +1426,85 @@ define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { ret i1 %cmp1 } +; Negative tests with non-equality predicates +define i1 @icmp_ne_sext_sgt_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_sgt_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp sgt i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + ; tests from PR59555 define i1 @isFloat(i64 %0) { ; CHECK-LABEL: @isFloat( From lldb-commits at lists.llvm.org Fri Oct 6 03:00:20 2023 From: lldb-commits at lists.llvm.org (Nikita Popov via lldb-commits) Date: Fri, 06 Oct 2023 03:00:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fdab4.170a0220.43a4a.aa5a@mx.google.com> nikic wrote: Looks like incorrect conflict resolution in the last merge. https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Fri Oct 6 03:27:54 2023 From: lldb-commits at lists.llvm.org (antoine moynault via lldb-commits) Date: Fri, 06 Oct 2023 03:27:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651fe12a.170a0220.693f0.a879@mx.google.com> antmox wrote: Hello! It looks like this broke lldb-aarch64-windows bot: https://lab.llvm.org/buildbot/#/builders/219/builds/6130 Could you please look at this ? https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:38 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:38 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1ce.170a0220.31233.a81c@mx.google.com> https://github.com/omjavaid edited https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:39 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:39 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1cf.170a0220.1c99a.b46a@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE +registers undefined. It will also disable ZA, which follows what the Linux +Kernel does. + +Inactive ZA Handling +.................... + +LLDB does not handle registers that can come and go at runtime (SVE changes +size but it does not dissappear). Therefore when ``za`` is not enabled, LLDB +will return a block of 0s instead. This block will match the expected size of +``za``:: + + (lldb) register read za svg svcr + za = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 <...> } + svg = 0x0000000000000002 + svcr = 0x0000000000000001 + +Note that ``svcr`` bit 2 is not set, meaning ``za`` is inactive. + +If you were to write to ``za`` from LLDB, ``za`` will be made active. There is +no way from within LLDB to reverse this change. As for changing the vector +length, the debugee could still do something that would disable ``za`` again. + +If you want to know whether ``za`` is active or not, refer to bit 2 of the +``svcr`` register. + +ZA Register Presentation +........................ + +As for SVE, LLDB does not know how you will use ``za``. At any given time an ---------------- omjavaid wrote: Similar to SVE https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:40 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d0.050a0220.6b819.2ee8@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the ---------------- omjavaid wrote: LLDB makes no attempt to predict how an SVE Z register should be visualized. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:40 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d0.170a0220.92e38.aa14@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. ---------------- omjavaid wrote: Elsewhere in start and end of this sentence seems to be a typo. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:43 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d3.170a0220.b8ae9.b212@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you ---------------- omjavaid wrote: vg register also has write access to update SVE vector length during a debug session. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:43 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d3.170a0220.4ee71.a7dc@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore ---------------- omjavaid wrote: i guess what i was confused about above is explain here but i ll leave the comment as it is in case you consider a reshuffle. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:45 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d5.a70a0220.6929a.2c90@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE ---------------- omjavaid wrote: out of context of this review: but is there a possibility that we see in future ZA being available while SVE registers are not? https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:45 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d5.170a0220.151b1.f5f1@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. ---------------- omjavaid wrote: I am wondering what is meant by acts same as vg? I am little confused on whether svg and vg are same in streaming SVE mode and also when ZA is enabled? I guess add a line explaining that relationship here. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:30:46 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:30:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <651fe1d6.630a0220.89274.93a2@mx.google.com> https://github.com/omjavaid commented: The document looks great but I have left some comments. They are just more like my opinions you may consider some or all of them while updating this document. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 03:41:36 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 03:41:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (PR #68405) Message-ID: https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/68405 Fixes misleading comment introduced in `f74aaca63202cabb512c78fe19196ff348d436a8` >From 90db577879200ac4bd33f1078626ca54d8d1dc62 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 11:38:03 +0100 Subject: [PATCH] [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection Fixes misleading comment introduced in `f74aaca63202cabb512c78fe19196ff348d436a8` --- lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 6e13626d2894313..d0065896b0d2292 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2878,8 +2878,8 @@ void DWARFASTParserClang::ParseSingleMember( attrs.accessibility = eAccessNone; // Handle static members, which are typically members without - // locations. However, GCC *never* emits DW_AT_data_member_location - // for static data members of unions. + // locations. However, GCC doesn't emit DW_AT_data_member_location + // for any union members (regardless of linkage). // Non-normative text pre-DWARFv5 recommends marking static // data members with an DW_AT_external flag. Clang emits this consistently // whereas GCC emits it only for static data members if not part of an From lldb-commits at lists.llvm.org Fri Oct 6 03:41:42 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 03:41:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] a233a49 - [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (#68405) Message-ID: <651fe466.170a0220.d59a.b3ec@mx.google.com> Author: Michael Buch Date: 2023-10-06T11:41:37+01:00 New Revision: a233a49b6037240adcde2dab74571fec13354a08 URL: https://github.com/llvm/llvm-project/commit/a233a49b6037240adcde2dab74571fec13354a08 DIFF: https://github.com/llvm/llvm-project/commit/a233a49b6037240adcde2dab74571fec13354a08.diff LOG: [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (#68405) Fixes misleading comment introduced in `f74aaca63202cabb512c78fe19196ff348d436a8` Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 6e13626d2894313..d0065896b0d2292 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2878,8 +2878,8 @@ void DWARFASTParserClang::ParseSingleMember( attrs.accessibility = eAccessNone; // Handle static members, which are typically members without - // locations. However, GCC *never* emits DW_AT_data_member_location - // for static data members of unions. + // locations. However, GCC doesn't emit DW_AT_data_member_location + // for any union members (regardless of linkage). // Non-normative text pre-DWARFv5 recommends marking static // data members with an DW_AT_external flag. Clang emits this consistently // whereas GCC emits it only for static data members if not part of an From lldb-commits at lists.llvm.org Fri Oct 6 03:41:43 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 03:41:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (PR #68405) In-Reply-To: Message-ID: <651fe467.170a0220.a3184.b06a@mx.google.com> https://github.com/Michael137 closed https://github.com/llvm/llvm-project/pull/68405 From lldb-commits at lists.llvm.org Fri Oct 6 03:42:10 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 03:42:10 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (PR #68405) In-Reply-To: Message-ID: <651fe482.630a0220.17990.9897@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68405 From lldb-commits at lists.llvm.org Fri Oct 6 03:42:33 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 03:42:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651fe499.620a0220.67b20.2daa@mx.google.com> Michael137 wrote: > Hello! It looks like this broke lldb-aarch64-windows bot: https://lab.llvm.org/buildbot/#/builders/219/builds/6130 Could you please look at this ? Yup! Was in the process of fixing. I think we just skip this test on windows https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Fri Oct 6 03:42:37 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 03:42:37 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang][NFC] Fix comment regarding static data member detection (PR #68405) In-Reply-To: Message-ID: <651fe49d.050a0220.1ad2a.2daa@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes Fixes misleading comment introduced in `f74aaca63202cabb512c78fe19196ff348d436a8` (https://github.com/llvm/llvm-project/pull/68300) --- Full diff: https://github.com/llvm/llvm-project/pull/68405.diff 1 Files Affected: - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp (+2-2) ``````````diff diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 6e13626d2894313..d0065896b0d2292 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2878,8 +2878,8 @@ void DWARFASTParserClang::ParseSingleMember( attrs.accessibility = eAccessNone; // Handle static members, which are typically members without - // locations. However, GCC *never* emits DW_AT_data_member_location - // for static data members of unions. + // locations. However, GCC doesn't emit DW_AT_data_member_location + // for any union members (regardless of linkage). // Non-normative text pre-DWARFv5 recommends marking static // data members with an DW_AT_external flag. Clang emits this consistently // whereas GCC emits it only for static data members if not part of an ``````````
https://github.com/llvm/llvm-project/pull/68405 From lldb-commits at lists.llvm.org Fri Oct 6 03:53:46 2023 From: lldb-commits at lists.llvm.org (Omair Javaid via lldb-commits) Date: Fri, 06 Oct 2023 03:53:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef (PR #66768) In-Reply-To: Message-ID: <651fe73a.a70a0220.60cd8.2d1b@mx.google.com> ================ @@ -783,6 +783,11 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional svg_reg_value; const RegisterInfo *svg_reg_info = m_reg_info_sp->GetRegisterInfo("svg"); if (svg_reg_info) { + // When vg is written it is automatically made invalid. Writing vg will also + // change svg if we're in streaming mode but it will not be made invalid + // so do this manually so the following read gets the latest svg value. ---------------- omjavaid wrote: I am wondering whether we should find a way to make svg and vg inter dependent to make sure they are invalidated together whenever an update happens. Do you see a way within RegisterInfo class to link registers together. I dont fully remember but i believe this could be done. https://github.com/llvm/llvm-project/pull/66768 From lldb-commits at lists.llvm.org Fri Oct 6 04:07:51 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:07:51 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) Message-ID: https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/68408 These tests never worked since their introduction in https://github.com/llvm/llvm-project/pull/68300 >From c5f0e050d8bfaad588693c99b2f680084a956718 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:00:28 +0100 Subject: [PATCH 1/3] [lldb][test] TestCppUnionStaticMembers.py: split out assertions that fail on some platforms Split out the assertions that fail on Windows in preparation to XFAILing them. Drive-by change: * Add a missing `self.build()` call in `test_union_in_anon_namespace` * Fix formatting --- .../TestCppUnionStaticMembers.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 47166636b12647c..01e8a25276bc43c 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -8,14 +8,14 @@ import lldbsuite.test.lldbutil as lldbutil class CppUnionStaticMembersTestCase(TestBase): - def test(self): + def test_print_union(self): """Tests that frame variable and expr work - for union static data members""" + for union with static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") - ) + ) self.expect("frame variable foo", substrs=["val = 42"]) self.expect("frame variable bar", substrs=["val = 137"]) @@ -27,6 +27,15 @@ def test(self): name="val", value="137" )]) + def test_expr_union_static_members(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( name="val", value="42" @@ -37,6 +46,12 @@ def test_union_in_anon_namespace(self): """Tests that frame variable and expr work for union static data members in anonymous namespaces""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( name="val", value="137" >From b5fc63d9372b41ac4bdd3a95a40907b6ec32225d Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:03:28 +0100 Subject: [PATCH 2/3] [lldb][lldbsuite] Add expectedFailureWindows decorator --- lldb/packages/Python/lldbsuite/test/decorators.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 6194862f8fe60f9..9e0197fdcf66644 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -510,6 +510,9 @@ def expectedFailureAndroid(bugnumber=None, api_levels=None, archs=None): def expectedFailureNetBSD(bugnumber=None): return expectedFailureOS(["netbsd"], bugnumber) +def expectedFailureWindows(bugnumber=None): + return expectedFailureOS(["windows"], bugnumber) + # TODO: This decorator does not do anything. Remove it. def expectedFlakey(expected_fn, bugnumber=None): >From eac7b78ad0e655da2cf997690a55e8a928657e47 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:04:18 +0100 Subject: [PATCH 3/3] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows These tests never worked since their introduction in https://github.com/llvm/llvm-project/pull/68300 --- .../cpp/union-static-data-members/TestCppUnionStaticMembers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 01e8a25276bc43c..4dcc671f79aa98f 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -27,6 +27,7 @@ def test_print_union(self): name="val", value="137" )]) + @expectedFailureWindows def test_expr_union_static_members(self): """Tests that frame variable and expr work for union static data members""" From lldb-commits at lists.llvm.org Fri Oct 6 04:08:33 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:08:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][DWARFASTParserClang] Check DW_AT_declaration to determine static data members (PR #68300) In-Reply-To: Message-ID: <651feab1.a70a0220.2e816.2ec8@mx.google.com> Michael137 wrote: @antmox Addressed in https://github.com/llvm/llvm-project/pull/68408 https://github.com/llvm/llvm-project/pull/68300 From lldb-commits at lists.llvm.org Fri Oct 6 04:09:00 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 04:09:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651feacc.170a0220.a71df.b035@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-lldb
Changes These tests never worked since their introduction in https://github.com/llvm/llvm-project/pull/68300 --- Full diff: https://github.com/llvm/llvm-project/pull/68408.diff 2 Files Affected: - (modified) lldb/packages/Python/lldbsuite/test/decorators.py (+3) - (modified) lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py (+19-3) ``````````diff diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 6194862f8fe60f9..9e0197fdcf66644 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -510,6 +510,9 @@ def expectedFailureAndroid(bugnumber=None, api_levels=None, archs=None): def expectedFailureNetBSD(bugnumber=None): return expectedFailureOS(["netbsd"], bugnumber) +def expectedFailureWindows(bugnumber=None): + return expectedFailureOS(["windows"], bugnumber) + # TODO: This decorator does not do anything. Remove it. def expectedFlakey(expected_fn, bugnumber=None): diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 47166636b12647c..4dcc671f79aa98f 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -8,14 +8,14 @@ import lldbsuite.test.lldbutil as lldbutil class CppUnionStaticMembersTestCase(TestBase): - def test(self): + def test_print_union(self): """Tests that frame variable and expr work - for union static data members""" + for union with static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") - ) + ) self.expect("frame variable foo", substrs=["val = 42"]) self.expect("frame variable bar", substrs=["val = 137"]) @@ -27,6 +27,16 @@ def test(self): name="val", value="137" )]) + @expectedFailureWindows + def test_expr_union_static_members(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( name="val", value="42" @@ -37,6 +47,12 @@ def test_union_in_anon_namespace(self): """Tests that frame variable and expr work for union static data members in anonymous namespaces""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( name="val", value="137" ``````````
https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 04:09:50 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:09:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651feafe.170a0220.8b67f.afbf@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 04:10:28 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:10:28 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651feb24.170a0220.bffe7.af95@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 04:10:47 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:10:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651feb37.a70a0220.4e52e.2e80@mx.google.com> https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 04:19:00 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 04:19:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651fed24.630a0220.c0e63.1118@mx.google.com> github-actions[bot] wrote: :warning: Python code formatter, darker found issues in your code. :warning:
You can test this locally with the following command: ``````````bash darker --check --diff -r a16f6462d756804276d4b39267b3c19bcd6949fe..eac7b78ad0e655da2cf997690a55e8a928657e47 lldb/packages/Python/lldbsuite/test/decorators.py lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py ``````````
View the diff from darker here. ``````````diff --- packages/Python/lldbsuite/test/decorators.py 2023-10-06 11:03:28.000000 +0000 +++ packages/Python/lldbsuite/test/decorators.py 2023-10-06 11:18:52.873861 +0000 @@ -508,10 +508,11 @@ def expectedFailureNetBSD(bugnumber=None): return expectedFailureOS(["netbsd"], bugnumber) + def expectedFailureWindows(bugnumber=None): return expectedFailureOS(["windows"], bugnumber) # TODO: This decorator does not do anything. Remove it. --- test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py 2023-10-06 11:04:22.000000 +0000 +++ test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py 2023-10-06 11:18:52.924654 +0000 @@ -8,11 +8,11 @@ import lldbsuite.test.lldbutil as lldbutil class CppUnionStaticMembersTestCase(TestBase): def test_print_union(self): """Tests that frame variable and expr work - for union with static data members""" + for union with static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") ) @@ -28,11 +28,11 @@ )]) @expectedFailureWindows def test_expr_union_static_members(self): """Tests that frame variable and expr work - for union static data members""" + for union static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") ) ``````````
https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 04:29:25 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 04:29:25 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fef95.170a0220.7aad3.b158@mx.google.com> https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/65852 >From d9d8bcbb98e8f5aecb9733329389d61a489bd731 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 9 Sep 2023 23:07:29 +0800 Subject: [PATCH 01/10] [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 62 ++++++ .../test/Transforms/InstCombine/icmp-range.ll | 181 ++++++++++++++++++ 2 files changed, 243 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9fdc46fec631679..837b8e6d2619989 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6309,7 +6309,69 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + ICmpInst::Predicate Pred1, Pred2; const APInt *C; + // icmp eq/ne X, (zext (icmp eq/ne X, C)) + if (match(&I, m_c_ICmp(Pred1, m_Value(X), + m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + if (C->isZero()) { + if (Pred2 == ICmpInst::ICMP_EQ) { + // icmp eq X, (zext (icmp eq X, 0)) --> false + // icmp ne X, (zext (icmp eq X, 0)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else if (C->isOne()) { + if (Pred2 == ICmpInst::ICMP_NE) { + // icmp eq X, (zext (icmp ne X, 1)) --> false + // icmp ne X, (zext (icmp ne X, 1)) --> true + return replaceInstUsesWith( + I, + Constant::getIntegerValue( + I.getType(), + APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + } else { + // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 + // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + return ICmpInst::Create( + Instruction::ICmp, + Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT + : ICmpInst::ICMP_ULT, + X, + Constant::getIntegerValue( + X->getType(), APInt(X->getType()->getScalarSizeInBits(), + Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + } + } else { + // C != 0 && C != 1 + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + return ICmpInst::Create( + Instruction::ICmp, Pred1, X, + Constant::getIntegerValue( + X->getType(), + APInt(X->getType()->getScalarSizeInBits(), + static_cast(Pred2 == ICmpInst::ICMP_NE)))); + } + } + if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) && match(I.getOperand(1), m_APInt(C)) && X->getType()->isIntOrIntVectorTy(1) && diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 4281e09cb0309c8..15424fce33fdeea 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1034,6 +1034,187 @@ define i1 @icmp_ne_bool_1(ptr %ptr) { ret i1 %cmp } +define i1 @icmp_ne_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_zero( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_zero(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_zero( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_one( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_one(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_one( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp ne i32 %a, 1 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_eq_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_eq_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp eq i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { +; CHECK-LABEL: @icmp_eq_zext_ne_non_boolean( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = zext i1 %cmp to i32 + %cmp1 = icmp eq i32 %conv, %a + ret i1 %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp ne <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + +define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { +; CHECK-LABEL: @icmp_ne_zext_eq_non_boolean_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %cmp = icmp eq <2 x i32> %a, + %conv = zext <2 x i1> %cmp to <2 x i32> + %cmp1 = icmp ne <2 x i32> %conv, %a + ret <2 x i1> %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From bf79e8624a1578c65ca3adc4c3c95512c0e18d53 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 22:36:02 +0800 Subject: [PATCH 02/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d0b62c17ec94358..d1f141bcf0e7df7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,10 +6391,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp eq X, 0)) --> false // icmp ne X, (zext (icmp eq X, 0)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 @@ -6412,10 +6409,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true return replaceInstUsesWith( - I, - Constant::getIntegerValue( - I.getType(), - APInt(1U, static_cast(Pred1 == ICmpInst::ICMP_NE)))); + I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 >From ba475e31713758724305acbff496cbe605888da8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Sep 2023 23:00:17 +0800 Subject: [PATCH 03/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../Transforms/InstCombine/InstCombineCompares.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d1f141bcf0e7df7..c5e3ad8a55741fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6400,9 +6400,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else if (C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,9 +6416,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, X, - Constant::getIntegerValue( - X->getType(), APInt(X->getType()->getScalarSizeInBits(), - Pred1 == ICmpInst::ICMP_NE ? 1 : 2))); + ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { // C != 0 && C != 1 @@ -6430,10 +6426,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - Constant::getIntegerValue( - X->getType(), - APInt(X->getType()->getScalarSizeInBits(), - static_cast(Pred2 == ICmpInst::ICMP_NE)))); + ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); } } >From 70a70fb44d0e628a1cf485e1767ada3eaaa26b0f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:30:11 +0800 Subject: [PATCH 04/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++ .../InstCombine/InstCombineCompares.cpp | 50 +++++++++++++------ 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 13877538f79de6d..38d40d1ec9a839e 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,6 +767,28 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } +/// Match a pattern, capturing the value if we match. +template struct capture_ty { + SubPattern_t SubPattern; + Class *&VR; + + capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} + + template bool match(ITy *V) { + if (auto *CV = dyn_cast(V)) { + VR = CV; + return SubPattern.match(V); + } + return false; + } +}; + +template +inline capture_ty m_Instruction(Instruction *&I, + const T &SubPattern) { + return capture_ty(SubPattern, I); +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c5e3ad8a55741fb..aca8611026ef1ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6380,53 +6380,71 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); + // icmp eq/ne X, (zext/sext (icmp eq/ne X, C)) ICmpInst::Predicate Pred1, Pred2; const APInt *C; - // icmp eq/ne X, (zext (icmp eq/ne X, C)) + Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_ZExt(m_ICmp(Pred2, m_Deferred(X), m_APInt(C))))) && - ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { + m_Instruction(ExtI, + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { + bool IsSExt = ExtI->getOpcode() == Instruction::SExt; + bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { - // icmp eq X, (zext (icmp eq X, 0)) --> false - // icmp ne X, (zext (icmp eq X, 0)) --> true + // icmp eq X, (zext/sext (icmp eq X, 0)) --> false + // icmp ne X, (zext/sext (icmp eq X, 0)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } - } else if (C->isOne()) { + } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { // icmp eq X, (zext (icmp ne X, 1)) --> false // icmp ne X, (zext (icmp ne X, 1)) --> true + // icmp eq X, (sext (icmp ne X, -1)) --> false + // icmp ne X, (sext (icmp ne X, -1)) --> true return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); - } else { + } else if (!IsSExt || HasOneUse) { // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 + // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 + // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 return ICmpInst::Create( Instruction::ICmp, Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT, - X, + IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) + : X, ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); } } else { - // C != 0 && C != 1 - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != 1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 + // when C != 0 && C != -1: + // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), Pred2 == ICmpInst::ICMP_NE ? 1 : 0)); + ConstantInt::get(X->getType(), + Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); } } >From 418562d5dbd25167d3f9b2c61fb7265581ee99d4 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 03:39:23 +0800 Subject: [PATCH 05/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/include/llvm/IR/PatternMatch.h | 22 ----- .../InstCombine/InstCombineCompares.cpp | 6 +- .../Transforms/InstCombine/and-or-icmps.ll | 17 +--- .../test/Transforms/InstCombine/icmp-range.ll | 82 ++++++------------- 4 files changed, 31 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 38d40d1ec9a839e..13877538f79de6d 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -767,28 +767,6 @@ m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } -/// Match a pattern, capturing the value if we match. -template struct capture_ty { - SubPattern_t SubPattern; - Class *&VR; - - capture_ty(const SubPattern_t &SP, Class *&V) : SubPattern(SP), VR(V) {} - - template bool match(ITy *V) { - if (auto *CV = dyn_cast(V)) { - VR = CV; - return SubPattern.match(V); - } - return false; - } -}; - -template -inline capture_ty m_Instruction(Instruction *&I, - const T &SubPattern) { - return capture_ty(SubPattern, I); -} - /// Match a specified Value*. struct specificval_ty { const Value *Val; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index aca8611026ef1ca..b72bde885de124b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6385,9 +6385,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { const APInt *C; Instruction *ExtI; if (match(&I, m_c_ICmp(Pred1, m_Value(X), - m_Instruction(ExtI, - m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_CombineAnd(m_Instruction(ExtI), + m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), + m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); if (C->isZero()) { diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 065dbf261e131bf..2c887d574d397f6 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,19 +366,10 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 -; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 -; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] -; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] -; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] -; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] +; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index f7efff9f675373a..3a40755384f2a21 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1164,7 +1164,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { } define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { -; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec +; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( ; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, @@ -1218,10 +1218,7 @@ define <2 x i1> @icmp_ne_zext_eq_non_boolean_vec(<2 x i32> %a) { define i1 @icmp_ne_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1231,9 +1228,8 @@ define i1 @icmp_ne_sext_eq_zero(i32 %a) { define i1 @icmp_ne_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1244,10 +1240,7 @@ define i1 @icmp_ne_sext_ne_zero(i32 %a) { define i1 @icmp_eq_sext_eq_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp eq i32 %a, 0 %conv = sext i1 %cmp to i32 @@ -1257,9 +1250,8 @@ define i1 @icmp_eq_sext_eq_zero(i32 %a) { define i1 @icmp_eq_sext_ne_zero(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 0 @@ -1270,9 +1262,8 @@ define i1 @icmp_eq_sext_ne_zero(i32 %a) { define i1 @icmp_ne_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1283,10 +1274,7 @@ define i1 @icmp_ne_sext_eq_allones(i32 %a) { define i1 @icmp_ne_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1296,9 +1284,8 @@ define i1 @icmp_ne_sext_ne_allones(i32 %a) { define i1 @icmp_eq_sext_eq_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, -1 @@ -1309,10 +1296,7 @@ define i1 @icmp_eq_sext_eq_allones(i32 %a) { define i1 @icmp_eq_sext_ne_allones(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_allones( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 false ; %cmp = icmp ne i32 %a, -1 %conv = sext i1 %cmp to i32 @@ -1322,9 +1306,7 @@ define i1 @icmp_eq_sext_ne_allones(i32 %a) { define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1335,9 +1317,7 @@ define i1 @icmp_ne_sext_eq_otherwise(i32 %a) { define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1348,9 +1328,7 @@ define i1 @icmp_ne_sext_ne_otherwise(i32 %a) { define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_eq_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp eq i32 %a, 2 @@ -1361,9 +1339,7 @@ define i1 @icmp_eq_sext_eq_otherwise(i32 %a) { define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { ; CHECK-LABEL: @icmp_eq_sext_ne_otherwise( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[CMP1]] ; %cmp = icmp ne i32 %a, 2 @@ -1374,10 +1350,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1387,9 +1360,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1400,9 +1372,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1413,10 +1384,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] -; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1426,9 +1394,7 @@ define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_otherwise_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[CONV]], [[A]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, >From b7565ccc7090e15f6330da274225420691a4160d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Sep 2023 10:24:50 +0800 Subject: [PATCH 06/10] [InstCombine] Fix comments `zext` -> `sext`. NFC. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b72bde885de124b..c7b9366bfd45e41 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6437,10 +6437,10 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 // when C != 0 && C != -1: - // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 - // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, -1 - // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 - // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, -1 + // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0 + // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1 + // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0 + // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, ConstantInt::get(X->getType(), >From 5b5052403968d4b394b30779c8912a1e732e9998 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 17:59:26 +0800 Subject: [PATCH 07/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7b9366bfd45e41..7c7ab62c64dac89 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6390,6 +6390,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { m_APInt(C))))))) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); + auto CreateRangeCheck = [&] { + Value *V1 = Constant::getNullValue(X->getType()); + Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + return BinaryOperator::Create( + Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, + Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { // icmp eq X, (zext/sext (icmp eq X, 0)) --> false @@ -6397,17 +6404,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp ne X, 0)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp ne X, 0)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp ne X, 0)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp ne X, 0)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else if (IsSExt ? C->isAllOnes() : C->isOne()) { if (Pred2 == ICmpInst::ICMP_NE) { @@ -6418,17 +6419,11 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { return replaceInstUsesWith( I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); } else if (!IsSExt || HasOneUse) { - // icmp eq X, (zext (icmp eq X, 1)) --> icmp ult X, 2 - // icmp ne X, (zext (icmp eq X, 1)) --> icmp ugt X, 1 - // icmp eq X, (sext (icmp eq X, -1)) --> icmp ult (X + 1), 2 - // icmp ne X, (sext (icmp eq X, -1)) --> icmp ugt (X + 1), 1 - return ICmpInst::Create( - Instruction::ICmp, - Pred1 == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGT - : ICmpInst::ICMP_ULT, - IsSExt ? Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1)) - : X, - ConstantInt::get(X->getType(), Pred1 == ICmpInst::ICMP_NE ? 1 : 2)); + // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1 + // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1 + // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1 + // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1 + return CreateRangeCheck(); } } else { // when C != 0 && C != 1: >From 55d52b1f05004abe6c4187dc07437580c7f5aa73 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 24 Sep 2023 21:49:47 +0800 Subject: [PATCH 08/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c7ab62c64dac89..9f2d0c8110f79e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6391,11 +6391,13 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { - Value *V1 = Constant::getNullValue(X->getType()); - Value *V2 = ConstantInt::get(X->getType(), IsSExt ? -1 : 1); + Value *CmpV1 = + Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); + Value *CmpV2 = Builder.CreateICmp( + Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, - Builder.CreateICmp(Pred1, X, V1), Builder.CreateICmp(Pred1, X, V2)); + CmpV1, CmpV2); }; if (C->isZero()) { if (Pred2 == ICmpInst::ICMP_EQ) { >From c0d8f8193fa1620db1f84379f2316fcf4b401e4c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Oct 2023 20:10:57 +0800 Subject: [PATCH 09/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` --- .../InstCombine/InstCombineCompares.cpp | 10 ++++---- .../Transforms/InstCombine/and-or-icmps.ll | 17 ++++++++++---- .../test/Transforms/InstCombine/icmp-range.ll | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f2d0c8110f79e0..4fca8859dea7acc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6387,14 +6387,15 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { if (match(&I, m_c_ICmp(Pred1, m_Value(X), m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), - m_APInt(C))))))) { + m_APInt(C)))))) && + ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { bool IsSExt = ExtI->getOpcode() == Instruction::SExt; bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); auto CreateRangeCheck = [&] { Value *CmpV1 = Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); Value *CmpV2 = Builder.CreateICmp( - Pred1, X, ConstantInt::get(X->getType(), IsSExt ? -1 : 1)); + Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1)); return BinaryOperator::Create( Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, CmpV1, CmpV2); @@ -6440,8 +6441,9 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 return ICmpInst::Create( Instruction::ICmp, Pred1, X, - ConstantInt::get(X->getType(), - Pred2 == ICmpInst::ICMP_NE ? (IsSExt ? -1 : 1) : 0)); + ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE + ? (IsSExt ? -1 : 1) + : 0)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2c887d574d397f6..065dbf261e131bf 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -366,10 +366,19 @@ define void @simplify_before_foldAndOfICmps(ptr %p) { ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2 -; CHECK-NEXT: [[C18:%.*]] = icmp slt i16 [[L7]], 1 -; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1 +; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16 +; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]] +; CHECK-NEXT: [[C5:%.*]] = icmp slt i16 [[L7]], 1 +; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 +; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C7]], [[C10]] +; CHECK-NEXT: [[C6:%.*]] = xor i1 [[B15]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C5]], [[C6]] +; CHECK-NEXT: [[C3:%.*]] = and i1 [[TMP2]], [[C10]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C10]], true +; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[C3]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP4]] ; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1 ; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8 diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 3a40755384f2a21..79790b7458d4219 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1403,6 +1403,29 @@ define <2 x i1> @icmp_ne_sext_eq_otherwise_vec(<2 x i32> %a) { ret <2 x i1> %cmp1 } +define i1 @icmp_ne_sext_ne_zero_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_zero_i128( +; CHECK-NEXT: [[TMP1:%.*]] = add i128 [[A:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i128 [[TMP1]], -2 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 0 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { +; CHECK-LABEL: @icmp_ne_sext_ne_otherwise_i128( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i128 [[A:%.*]], -1 +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i128 %a, 2 + %conv = sext i1 %cmp to i128 + %cmp1 = icmp ne i128 %conv, %a + ret i1 %cmp1 +} + !0 = !{i32 1, i32 6} !1 = !{i32 0, i32 6} !2 = !{i8 0, i8 1} >From 2285a2c6b8e66cf35aed6151c61e841f1349817f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 6 Oct 2023 17:17:53 +0800 Subject: [PATCH 10/10] fixup! [InstCombine] Simplify the pattern `a ne/eq (zext (a ne/eq c))` Add negative tests --- .../test/Transforms/InstCombine/icmp-range.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 17903be937057ab..7af06e03fd4b2a9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1426,6 +1426,85 @@ define i1 @icmp_ne_sext_ne_otherwise_i128(i128 %a) { ret i1 %cmp1 } +; Negative tests with non-equality predicates +define i1 @icmp_ne_sext_sgt_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_sgt_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp sgt i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_zero_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_zero_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 0 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_allones_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_allones_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], -1 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, -1 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_ne_sext_slt_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_ne_sext_slt_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp slt i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp ne i32 %conv, %a + ret i1 %cmp1 +} + +define i1 @icmp_slt_sext_ne_otherwise_nofold(i32 %a) { +; CHECK-LABEL: @icmp_slt_sext_ne_otherwise_nofold( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[CONV]], [[A]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %cmp = icmp ne i32 %a, 2 + %conv = sext i1 %cmp to i32 + %cmp1 = icmp slt i32 %conv, %a + ret i1 %cmp1 +} + ; tests from PR59555 define i1 @isFloat(i64 %0) { ; CHECK-LABEL: @isFloat( From lldb-commits at lists.llvm.org Fri Oct 6 04:30:54 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 04:30:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651fefee.170a0220.2de9d.b127@mx.google.com> dtcxzyw wrote: The conflict resolution should be correct now. https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Fri Oct 6 04:50:00 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 04:50:00 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <651ff468.a70a0220.dc6cc.332f@mx.google.com> https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/68408 >From 0692935303b47e3c3f51c55df5ec5d7aac71a6eb Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:00:28 +0100 Subject: [PATCH 1/3] [lldb][test] TestCppUnionStaticMembers.py: split out assertions that fail on some platforms Split out the assertions that fail on Windows in preparation to XFAILing them. Drive-by change: * Add a missing `self.build()` call in `test_union_in_anon_namespace` * Fix formatting --- .../TestCppUnionStaticMembers.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 47166636b12647c..026e707a8ad2bad 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -8,14 +8,14 @@ import lldbsuite.test.lldbutil as lldbutil class CppUnionStaticMembersTestCase(TestBase): - def test(self): + def test_print_union(self): """Tests that frame variable and expr work - for union static data members""" + for union with static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") - ) + ) self.expect("frame variable foo", substrs=["val = 42"]) self.expect("frame variable bar", substrs=["val = 137"]) @@ -27,6 +27,15 @@ def test(self): name="val", value="137" )]) + def test_expr_union_static_members(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( name="val", value="42" @@ -37,6 +46,12 @@ def test_union_in_anon_namespace(self): """Tests that frame variable and expr work for union static data members in anonymous namespaces""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( name="val", value="137" >From 3290709743194fdf475121f9e715324dbc56d2fd Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:03:28 +0100 Subject: [PATCH 2/3] [lldb][lldbsuite] Add expectedFailureWindows decorator --- lldb/packages/Python/lldbsuite/test/decorators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 6194862f8fe60f9..b8fea1e02e864de 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -511,6 +511,10 @@ def expectedFailureNetBSD(bugnumber=None): return expectedFailureOS(["netbsd"], bugnumber) +def expectedFailureWindows(bugnumber=None): + return expectedFailureOS(["windows"], bugnumber) + + # TODO: This decorator does not do anything. Remove it. def expectedFlakey(expected_fn, bugnumber=None): def expectedFailure_impl(func): >From 1c360990beecc5c784011e2f8327a6be8ddc132f Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 6 Oct 2023 12:04:18 +0100 Subject: [PATCH 3/3] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows These tests never worked since their introduction in https://github.com/llvm/llvm-project/pull/68300 --- .../cpp/union-static-data-members/TestCppUnionStaticMembers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 026e707a8ad2bad..1988e997499b222 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -27,6 +27,7 @@ def test_print_union(self): name="val", value="137" )]) + @expectedFailureWindows def test_expr_union_static_members(self): """Tests that frame variable and expr work for union static data members""" From lldb-commits at lists.llvm.org Fri Oct 6 05:18:33 2023 From: lldb-commits at lists.llvm.org (Nikita Popov via lldb-commits) Date: Fri, 06 Oct 2023 05:18:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <651ffb19.170a0220.cba6e.b3f4@mx.google.com> https://github.com/nikic approved this pull request. Yeah, it looks correct now. https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Fri Oct 6 05:53:40 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 05:53:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Canonicalize `(X +/- Y) & Y` into `~X & Y` when Y is a power of 2 (PR #67915) In-Reply-To: Message-ID: <65200354.170a0220.40d4a.ba37@mx.google.com> https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/67915 >From c4ce28c942c172e5646b5922f0b02b4169197840 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 1 Oct 2023 21:52:47 +0800 Subject: [PATCH 1/2] [InstCombine] Canonicalize `(X +/- Y) & Y` into `~X & Y` when Y is a power of 2 --- .../InstCombine/InstCombineAndOrXor.cpp | 8 ++++ llvm/test/Transforms/InstCombine/and.ll | 44 +++++++++++++++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index cbdab3e9c5fb91d..4322cc96f5a2b6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2250,6 +2250,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y); } + // Canonicalize: + // (X +/- Y) & Y --> ~X & Y when Y is a power of 2. + if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr( + m_c_Add(m_Value(X), m_Deferred(Y)), + m_Sub(m_Value(X), m_Deferred(Y)))))) && + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I)) + return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y); + const APInt *C; if (match(Op1, m_APInt(C))) { const APInt *XorC; diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index 90f027010e2aea6..eb39ff9014ff3a4 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -1595,8 +1595,8 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_undef(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_undef( -; CHECK-NEXT: [[B:%.*]] = add <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[B]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; %B = add <2 x i8> %A, @@ -1606,8 +1606,8 @@ define <2 x i8> @flip_masked_bit_undef(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_nonuniform( -; CHECK-NEXT: [[B:%.*]] = add <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[B]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; %B = add <2 x i8> %A, @@ -2546,3 +2546,39 @@ define i32 @and_zext_eq_zero(i32 %A, i32 %C) { %5 = and i32 %2, %4 ret i32 %5 } + +define i32 @canonicalize_and_add_power2_or_zero(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_power2_or_zero( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[P2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = add i32 %x, %p2 + %and = and i32 %val, %p2 + ret i32 %and +} + +define i32 @canonicalize_and_sub_power2_or_zero(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_sub_power2_or_zero( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[P2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = sub i32 %x, %p2 + %and = and i32 %val, %p2 + ret i32 %and +} >From b64ca5b5f743e6a935f4ea09154c3a08c6e65c47 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 6 Oct 2023 17:32:46 +0800 Subject: [PATCH 2/2] fixup! [InstCombine] Canonicalize `(X +/- Y) & Y` into `~X & Y` when Y is a power of 2 Add additional tests. --- llvm/test/Transforms/InstCombine/and.ll | 123 ++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index eb39ff9014ff3a4..989640ed41f2d2e 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -2582,3 +2582,126 @@ define i32 @canonicalize_and_sub_power2_or_zero(i32 %x, i32 %y) { %and = and i32 %val, %p2 ret i32 %and } + +define i32 @canonicalize_and_add_power2_or_zero_commuted1(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_power2_or_zero_commuted1( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[P2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = add i32 %p2, %x + %and = and i32 %val, %p2 + ret i32 %and +} + +define i32 @canonicalize_and_add_power2_or_zero_commuted2(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_power2_or_zero_commuted2( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[P2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = add i32 %x, %p2 + %and = and i32 %p2, %val + ret i32 %and +} + +define i32 @canonicalize_and_add_power2_or_zero_commuted3(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_power2_or_zero_commuted3( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[P2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = add i32 %p2, %x + %and = and i32 %p2, %val + ret i32 %and +} + +define i32 @canonicalize_and_sub_power2_or_zero_commuted_nofold(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_sub_power2_or_zero_commuted_nofold( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[VAL:%.*]] = sub i32 [[P2]], [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[VAL]], [[P2]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = sub i32 %p2, %x + %and = and i32 %val, %p2 + ret i32 %and +} + +define i32 @canonicalize_and_add_non_power2_or_zero_nofold(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_non_power2_or_zero_nofold( +; CHECK-NEXT: [[VAL:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[VAL]], [[Y]] +; CHECK-NEXT: ret i32 [[AND]] +; + %val = add i32 %x, %y + %and = and i32 %val, %y + ret i32 %and +} + +define i32 @canonicalize_and_add_power2_or_zero_multiuse_nofold(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_add_power2_or_zero_multiuse_nofold( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[VAL:%.*]] = add i32 [[P2]], [[X:%.*]] +; CHECK-NEXT: call void @use32(i32 [[VAL]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[VAL]], [[P2]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = add i32 %x, %p2 + call void @use32(i32 %val) + %and = and i32 %val, %p2 + ret i32 %and +} + +define i32 @canonicalize_and_sub_power2_or_zero_multiuse_nofold(i32 %x, i32 %y) { +; CHECK-LABEL: @canonicalize_and_sub_power2_or_zero_multiuse_nofold( +; CHECK-NEXT: [[NY:%.*]] = sub i32 0, [[Y:%.*]] +; CHECK-NEXT: [[P2:%.*]] = and i32 [[NY]], [[Y]] +; CHECK-NEXT: call void @use32(i32 [[P2]]) +; CHECK-NEXT: [[VAL:%.*]] = sub i32 [[X:%.*]], [[P2]] +; CHECK-NEXT: call void @use32(i32 [[VAL]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[VAL]], [[P2]] +; CHECK-NEXT: ret i32 [[AND]] +; + %ny = sub i32 0, %y + %p2 = and i32 %y, %ny + call void @use32(i32 %p2) ; keep p2 + + %val = sub i32 %x, %p2 + call void @use32(i32 %val) + %and = and i32 %val, %p2 + ret i32 %and +} From lldb-commits at lists.llvm.org Fri Oct 6 05:57:20 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 05:57:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <65200430.630a0220.71861.0e50@mx.google.com> https://github.com/DavidSpickett edited https://github.com/llvm/llvm-project/pull/68094 From lldb-commits at lists.llvm.org Fri Oct 6 05:58:21 2023 From: lldb-commits at lists.llvm.org (Yingwei Zheng via lldb-commits) Date: Fri, 06 Oct 2023 05:58:21 -0700 (PDT) Subject: [Lldb-commits] [lldb] [InstCombine] Simplify the pattern `a ne/eq (zext/sext (a ne/eq c))` (PR #65852) In-Reply-To: Message-ID: <6520046d.a70a0220.80e9a.38d4@mx.google.com> https://github.com/dtcxzyw closed https://github.com/llvm/llvm-project/pull/65852 From lldb-commits at lists.llvm.org Fri Oct 6 06:00:54 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:00:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add isAArch64SMEFA64 check to SME testing (PR #68094) In-Reply-To: Message-ID: <65200506.170a0220.8b238.ba86@mx.google.com> DavidSpickett wrote: I ran all the tests that originally had `isAArch64SME` in, on an FVP without smefa64 and added checks as needed. But I did not make note of the unchanged ones at the time, so I've redone that and added it to the commit message. The ffr use you specifically noticed, the tests use isAArch64SMEFA64 up front and skip if it's not there. Then later when they want an SVCR value they just use isAArch64SME since strictly that's all that bit needs to know. We could just have isAArch64SMEFA64 everywhere even if we didn't really need smefa64, but I figured it would be nice to have as many tests run in either situation as we can (for my benefit more than anything else right now). https://github.com/llvm/llvm-project/pull/68094 From lldb-commits at lists.llvm.org Fri Oct 6 06:09:17 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:09:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef (PR #66768) In-Reply-To: Message-ID: <652006fd.170a0220.bc54.b5ac@mx.google.com> ================ @@ -783,6 +783,11 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional svg_reg_value; const RegisterInfo *svg_reg_info = m_reg_info_sp->GetRegisterInfo("svg"); if (svg_reg_info) { + // When vg is written it is automatically made invalid. Writing vg will also + // change svg if we're in streaming mode but it will not be made invalid + // so do this manually so the following read gets the latest svg value. ---------------- DavidSpickett wrote: You're right there's `uint32_t *invalidate_regs;` in the register info, I didn't think to look at it. The problem with that is it needs to know up front what the register number is. So we could make writes to svg invalidate vg since we know what vg's register number will be, but this is not needed since svg is read only. Making vg invalidate svg is harder because we don't know what number it will be, and it might not even exist. Though, I could patch it in once we know we're adding the SME register set. Since after that we don't re-order registers, only change the size of some. I'll give that a go. If patching it in works, it would lead to some redundant reads if you write vg during non-streaming mode, but it would be no worse than what I've got here I think. https://github.com/llvm/llvm-project/pull/66768 From lldb-commits at lists.llvm.org Fri Oct 6 06:31:01 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:31:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200c15.170a0220.88a10.bd29@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE ---------------- DavidSpickett wrote: There is a configuration where you only have SVE registers while in streaming mode. It's legal and has kernel support, but I have not tested it yet. I expect it will mostly be fixing up tests that break, the ptrace use should be the same for cores like this. ZA only, I doubt it because one of the main ways to get data in and out of ZA is vector registers. We'd have to add some instructions that copied to pairs of Neon registers otherwise. Never say never of course but for now the streaming mode only SVE config is the one I know of. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:36:41 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:36:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200d69.170a0220.682d4.beb0@mx.google.com> https://github.com/DavidSpickett updated https://github.com/llvm/llvm-project/pull/66767 >From 705251d8ecbed546f46a5929a298ad88c70cc330 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 22 Aug 2023 11:12:23 +0100 Subject: [PATCH 1/2] [lldb][AArch64] Add release notes and documentation for SME This adds a release note for all the SME support now in LLDB and a page where I have documented the user experience (for want of a better term) when using SVE and SME. This includes things like which mode transitions can or cannot be triggered from within LLDB. I hope this will serve to A: document what I've implemented and B: be a user's guide to these extensions. (though it is not a design document, read the commits and code for that sort of detail) --- lldb/docs/index.rst | 1 + lldb/docs/use/aarch64-linux.rst | 190 ++++++++++++++++++++++++++++++++ llvm/docs/ReleaseNotes.rst | 4 + 3 files changed, 195 insertions(+) create mode 100644 lldb/docs/use/aarch64-linux.rst diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst index 2eb57cefbd883ea..2fff25b27b974ea 100644 --- a/lldb/docs/index.rst +++ b/lldb/docs/index.rst @@ -125,6 +125,7 @@ interesting areas to contribute to lldb. use/qemu-testing use/intel_pt use/ondemand + use/aarch64-linux use/troubleshooting use/links Man Page diff --git a/lldb/docs/use/aarch64-linux.rst b/lldb/docs/use/aarch64-linux.rst new file mode 100644 index 000000000000000..850a5e5a5837cda --- /dev/null +++ b/lldb/docs/use/aarch64-linux.rst @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE +registers undefined. It will also disable ZA, which follows what the Linux +Kernel does. + +Inactive ZA Handling +.................... + +LLDB does not handle registers that can come and go at runtime (SVE changes +size but it does not dissappear). Therefore when ``za`` is not enabled, LLDB +will return a block of 0s instead. This block will match the expected size of +``za``:: + + (lldb) register read za svg svcr + za = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 <...> } + svg = 0x0000000000000002 + svcr = 0x0000000000000001 + +Note that ``svcr`` bit 2 is not set, meaning ``za`` is inactive. + +If you were to write to ``za`` from LLDB, ``za`` will be made active. There is +no way from within LLDB to reverse this change. As for changing the vector +length, the debugee could still do something that would disable ``za`` again. + +If you want to know whether ``za`` is active or not, refer to bit 2 of the +``svcr`` register. + +ZA Register Presentation +........................ + +As for SVE, LLDB does not know how you will use ``za``. At any given time an +instruction could use any number of subsets of it. Therefore LLDB will show +``za`` as one large vector of individual bytes. + +Expression evaluation +..................... + +The mode (streaming or non-streaming), streaming vector length and ZA state will +be restored after expression evaluation. On top of all the things saved for SVE +in general. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 660bb4e70a5a707..d7bca9540ec6a08 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -186,6 +186,10 @@ Changes to LLDB * Methods in SBHostOS related to threads have had their implementations removed. These methods will return a value indicating failure. +* LLDB now supports debugging the Scalable Matrix Extension (SME) on AArch64 + Linux for both running processes and core files. For details refer to the + `AArch64 Linux documentation `_. + Changes to Sanitizers --------------------- * HWASan now defaults to detecting use-after-scope bugs. >From dbc6963addddd8150d7c158a82b355e6282367c8 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 6 Oct 2023 14:36:21 +0100 Subject: [PATCH 2/2] Address comments --- lldb/docs/use/aarch64-linux.rst | 74 ++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/lldb/docs/use/aarch64-linux.rst b/lldb/docs/use/aarch64-linux.rst index 850a5e5a5837cda..707087a9bd72ea2 100644 --- a/lldb/docs/use/aarch64-linux.rst +++ b/lldb/docs/use/aarch64-linux.rst @@ -35,30 +35,32 @@ In LLDB you will be able to see the following new registers: The example above has a vector length of 16 bytes. Within LLDB you will always see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). -Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) -elsewhere. Where you see "vl", it is in bytes. - -Changing the Vector Length -.......................... +Elsewhere in kernel code or applications, you may see "vq" which is the vector +length in quadwords (16 bytes). Where you see "vl", it is in bytes. While you can count the size of a P or Z register, it is intended that ``vg`` be used to find the current vector length. -vg can be written. Writing the current vector length changes nothing. If you -increase the vector length, the registers will likely be reset to 0. If you -decrease it, LLDB will truncate the Z registers but everything else will be reset -to 0. +Changing the Vector Length +.......................... + +The ``vg`` register can be written during a debug session. Writing the current +vector length changes nothing. If you increase the vector length, the registers +will likely be reset to 0. If you decrease it, LLDB will truncate the Z +registers but everything else will be reset to 0. -Generally you should not assume that SVE state after changing the vector length -is in any way the same as it was previously. If you need to do it, do it before -a function's first use of SVE. +You should not assume that SVE state after changing the vector length is in any +way the same as it was previously. Whether that is done from within the +debuggee, or by LLDB. If you need to change the vector length, do so before a +function's first use of SVE. Z Register Presentation ....................... -LLDB makes no attempt to predict how an SVE Z register will be used. Even if the -next SVE instruction (which may some distance away) would use, for example, 32 -bit elements, LLDB prints ``z0`` as single bytes. +LLDB makes no attempt to predict how SVE Z registers will be used. Since LLDB +does not know what sort of elements future instructions will interpret the +register as. It therefore does not change the visualisation of the register +and always defaults to showing a vector of byte sized elements. If you know what format you are going to use, give a format option:: @@ -94,7 +96,8 @@ See `here `__ for the Linux Kernel's handling of it. -SME adds a "Streaming Mode" to SVE. This mode has its own vector length. +SME adds a "Streaming Mode" to SVE, and this mode has its own vector length +known as the "Streaming Vector Length". In LLDB you will see the following new registers: @@ -107,16 +110,27 @@ In LLDB you will see the following new registers: * ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix Extension". This is a square made up of rows of length equal to the streaming vector length (svl). Meaning that the total size is svl * svl. -* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. - Except that where ``vg`` shows the length for the active mode, ``svg`` will - always show the streaming vector length, even in non-streaming mode. This - register is read only. * ``svcr`` the Streaming Vector Control Register. This is actually a pseduo register but it matches the content of the architecturaly defined ``SVCR``. This is the register you should use to check whether streaming mode and/or ``za`` is active. This register is read only. +* ``svg`` the streaming vector length in granules. This value is not connected + to the vector length of non-streaming mode and may change independently. This + register is read only. + +.. note:: + While in non-streaming mode, the ``vg`` register shows the non-streaming + vector length, and the ``svg`` register shows the streaming vector length. + When in streaming mode, both ``vg`` and ``svg`` show the streaming mode vector + length. Therefore it is not possible at this time to read the non-streaming + vector length within LLDB, while in streaming mode. This is a limitation of + the LLDB implementation not the architecture, which stores both lengths + independently. -In the example below, the streaming vector length is 16 bytes:: +In the example below, the streaming vector length is 16 bytes and we are in +streaming mode. Note that bits 0 and 1 of ``svcr`` are set, indicating that we +are in streaming mode and ZA is active. ``vg`` and ``svg`` report the same value +as ``vg`` is showing the streaming mode vector length:: Scalable Vector Extension Registers: vg = 0x0000000000000002 @@ -139,9 +153,6 @@ In the example below, the streaming vector length is 16 bytes:: svg = 0x0000000000000002 svcr = 0x0000000000000003 -Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore -``svg`` and ``vg`` show the same value. - Changing the Streaming Vector Length .................................... @@ -153,8 +164,8 @@ As for non-streaming SVE, doing so will essentially make the content of the SVE registers undefined. It will also disable ZA, which follows what the Linux Kernel does. -Inactive ZA Handling -.................... +Visibility of an Inactive ZA Register +..................................... LLDB does not handle registers that can come and go at runtime (SVE changes size but it does not dissappear). Therefore when ``za`` is not enabled, LLDB @@ -173,14 +184,17 @@ no way from within LLDB to reverse this change. As for changing the vector length, the debugee could still do something that would disable ``za`` again. If you want to know whether ``za`` is active or not, refer to bit 2 of the -``svcr`` register. +``svcr`` register, otherwise known as ``SVCR.ZA``. ZA Register Presentation ........................ -As for SVE, LLDB does not know how you will use ``za``. At any given time an -instruction could use any number of subsets of it. Therefore LLDB will show -``za`` as one large vector of individual bytes. +As for SVE, LLDB does not know how the debugee will use ``za``, and therefore +does not know how it would be best to display it. At any time any given +instrucion could interpret its contents as many kinds and sizes of data. + +So LLDB will default to showing ``za`` as one large vector of individual bytes. +You can override this with a format option (see the SVE example above). Expression evaluation ..................... From lldb-commits at lists.llvm.org Fri Oct 6 06:37:49 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:37:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200dad.170a0220.9362f.ba91@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length ---------------- DavidSpickett wrote: I've clarified that users should not trust the content after a length change, from either lldb or the debugee. Same rule applies. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:39:14 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:39:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200e02.170a0220.8e9f2.a919@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the ---------------- DavidSpickett wrote: I've expanded this because the intent was to start with how the register is used, and then note how that impacts how it's shown. So that folks realise the connection, or rather lack of, which prevents us from being more clever about this. So I've added what you suggest, just as a later sentence. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:40:35 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:40:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200e53.170a0220.d59a.c4b0@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. ---------------- DavidSpickett wrote: I've noted explicitly in a few more places that the streaming and non-streaming vector length are not tied together. Then I've moved the comment about the contents of svg and vg into its own note after the bullet points. So that it can be properly explained. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:41:13 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:41:13 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200e79.170a0220.23263.bce2@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore ---------------- DavidSpickett wrote: I've moved this up to before the example. So I explain what you're going to see, then show the output. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:42:20 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:42:20 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200ebc.620a0220.a90cc.3d7a@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE +registers undefined. It will also disable ZA, which follows what the Linux +Kernel does. + +Inactive ZA Handling ---------------- DavidSpickett wrote: I went with "Visibility of an Inactive ZA Register" since, for now, this design choice is limited to ZA. Though you are right that if anyone else had this problem, they'd likely solve it how I did, making it a general pattern. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:42:50 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 06:42:50 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Add release notes and documentation for SME (PR #66767) In-Reply-To: Message-ID: <65200eda.620a0220.39bb2.3d77@mx.google.com> ================ @@ -0,0 +1,190 @@ +Using LLDB On AArch64 Linux +=========================== + +This page explains the details of debugging certain AArch64 extensions using +LLDB. If something is not mentioned here, it likely works as you would expect. + +This is not a replacement for ptrace and Linux Kernel documentation. This covers +how LLDB has chosen to use those things and how that effects your experience as +a user. + +Scalable Vector Extension (SVE) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +In LLDB you will be able to see the following new registers: + +* ``z0-z31`` vector registers, each one has size equal to the vector length. +* ``p0-p15`` predicate registers, each one containing 1 bit per byte in the vector + length. Making each one vector length / 8 sized. +* ``ffr`` the first fault register, same size as a predicate register. +* ``vg``, the vector length in "granules". Each granule is 8 bytes. + +.. code-block:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + +The example above has a vector length of 16 bytes. Within LLDB you will always +see "vg" as in the ``vg`` register, which is 2 in this case (8*2 = 16). +Elsewhere you may see "vq" which is the vector length in quadwords (16 bytes) +elsewhere. Where you see "vl", it is in bytes. + +Changing the Vector Length +.......................... + +While you can count the size of a P or Z register, it is intended that ``vg`` be +used to find the current vector length. + +vg can be written. Writing the current vector length changes nothing. If you +increase the vector length, the registers will likely be reset to 0. If you +decrease it, LLDB will truncate the Z registers but everything else will be reset +to 0. + +Generally you should not assume that SVE state after changing the vector length +is in any way the same as it was previously. If you need to do it, do it before +a function's first use of SVE. + +Z Register Presentation +....................... + +LLDB makes no attempt to predict how an SVE Z register will be used. Even if the +next SVE instruction (which may some distance away) would use, for example, 32 +bit elements, LLDB prints ``z0`` as single bytes. + +If you know what format you are going to use, give a format option:: + + (lldb) register read z0 -f uint32_t[] + z0 = {0x01010101 0x01010101 0x01010101 0x01010101} + +FPSIMD and SVE Modes +.................... + +Prior to the debugee's first use of SVE, it is in what the Linux Kernel terms +SIMD mode. Only the FPU is being used. In this state LLDB will still show the +SVE registers however the values are simply the FPU values zero extended up to +the vector length. + +On first access to SVE, the process goes into SVE mode. Now the Z values are +in the real Z registers. + +You can also trigger this with LLDB by writing to an SVE register. Note that +there is no way to undo this change from within LLDB. However, the debugee +itself could do something to end up back in SIMD mode. + +Expression evaluation +..................... + +If you evaluate an expression, all SVE state is saved prior to, and restored +after the expression has been evaluated. Including the register values and +vector length. + +Scalable Matrix Extension (SME) +------------------------------- + +See `here `__ +to learn about the extension and `here `__ +for the Linux Kernel's handling of it. + +SME adds a "Streaming Mode" to SVE. This mode has its own vector length. + +In LLDB you will see the following new registers: + +* ``tpidr2``, an extra per thread pointer reserved for use by the SME ABI. + This is not scalable, just pointer sized aka 64 bit. +* ``z0-z31`` streaming SVE registers. These have the same names as the + non-streaming registers and therefore you will only see the active set in + LLDB. You cannot read or write the inactive mode's registers. Their size + is the same as the streaming vector length. +* ``za`` the Array Storage register. The "Matrix" part of "Scalable Matrix + Extension". This is a square made up of rows of length equal to the streaming + vector length (svl). Meaning that the total size is svl * svl. +* ``svg`` the vector length in granules. This acts the same as ``vg`` for SVE. + Except that where ``vg`` shows the length for the active mode, ``svg`` will + always show the streaming vector length, even in non-streaming mode. This + register is read only. +* ``svcr`` the Streaming Vector Control Register. This is actually a pseduo + register but it matches the content of the architecturaly defined ``SVCR``. + This is the register you should use to check whether streaming mode and/or + ``za`` is active. This register is read only. + +In the example below, the streaming vector length is 16 bytes:: + + Scalable Vector Extension Registers: + vg = 0x0000000000000002 + z0 = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + <...> + p0 = {0xff 0xff} + <...> + ffr = {0xff 0xff} + + <...> + + Thread Local Storage Registers: + tpidr = 0x0000fffff7ff4320 + tpidr2 = 0x1122334455667788 + + Scalable Matrix Array Storage Registers: + za = {0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 <...> } + + Scalable Matrix Extension Registers: + svg = 0x0000000000000002 + svcr = 0x0000000000000003 + +Note that ``svcr`` bit 1 is set meaning we are in streaming mode. Therefore +``svg`` and ``vg`` show the same value. + +Changing the Streaming Vector Length +.................................... + +To reduce complexity for LLDB, ``svg`` is read only. This means that you can +only change the streaming vector length using LLDB when the debugee is in +streaming mode. + +As for non-streaming SVE, doing so will essentially make the content of the SVE +registers undefined. It will also disable ZA, which follows what the Linux +Kernel does. + +Inactive ZA Handling +.................... + +LLDB does not handle registers that can come and go at runtime (SVE changes +size but it does not dissappear). Therefore when ``za`` is not enabled, LLDB +will return a block of 0s instead. This block will match the expected size of +``za``:: + + (lldb) register read za svg svcr + za = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 <...> } + svg = 0x0000000000000002 + svcr = 0x0000000000000001 + +Note that ``svcr`` bit 2 is not set, meaning ``za`` is inactive. + +If you were to write to ``za`` from LLDB, ``za`` will be made active. There is +no way from within LLDB to reverse this change. As for changing the vector +length, the debugee could still do something that would disable ``za`` again. + +If you want to know whether ``za`` is active or not, refer to bit 2 of the +``svcr`` register. + +ZA Register Presentation +........................ + +As for SVE, LLDB does not know how you will use ``za``. At any given time an ---------------- DavidSpickett wrote: Yep, I've expanded it more to explain what use specifically means and why that means we can't predict the right format. https://github.com/llvm/llvm-project/pull/66767 From lldb-commits at lists.llvm.org Fri Oct 6 06:57:43 2023 From: lldb-commits at lists.llvm.org (J. Ryan Stinnett via lldb-commits) Date: Fri, 06 Oct 2023 06:57:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [IPSCCP] Variable not visible at Og. (PR #66745) In-Reply-To: Message-ID: <65201257.a70a0220.f2a4f.3de5@mx.google.com> https://github.com/jryans edited https://github.com/llvm/llvm-project/pull/66745 From lldb-commits at lists.llvm.org Fri Oct 6 07:28:28 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 07:28:28 -0700 (PDT) Subject: [Lldb-commits] [lldb] 21030b9 - [lldb][Docs] Add section on using QEMU without bridge networking Message-ID: <6520198c.170a0220.d2965.c4a3@mx.google.com> Author: David Spickett Date: 2023-10-06T15:28:10+01:00 New Revision: 21030b9ab4487d845e29792063f5666d8c4b8e09 URL: https://github.com/llvm/llvm-project/commit/21030b9ab4487d845e29792063f5666d8c4b8e09 DIFF: https://github.com/llvm/llvm-project/commit/21030b9ab4487d845e29792063f5666d8c4b8e09.diff LOG: [lldb][Docs] Add section on using QEMU without bridge networking Bridge network means that you can get to any port on the VM, from the host, which is great. However it is quite involved to setup in some cases, and I've certainly messed it up in the past. An alternative is forwarding a block of ports and using some hidden options to lldb-server to limit what it uses. This commit documents that and the pitfall that the port list isn't shared. The theory also works for Arm's FVP (which inspired me to write this up) but since QEMU is the preferred option upstream, it goes in that document. Along the way I fixed a link to the QEMU page that used the URL not a relative link to the document. Added: Modified: lldb/docs/resources/test.rst lldb/docs/use/qemu-testing.rst Removed: ################################################################################ diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index ddade269d5d4aba..3c9e24dde8fd454 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -589,9 +589,9 @@ Running tests in QEMU System Emulation Environment `````````````````````````````````````````````````` QEMU can be used to test LLDB in an emulation environment in the absence of -actual hardware. `QEMU based testing `_ -page describes how to setup an emulation environment using QEMU helper scripts -found under llvm-project/lldb/scripts/lldb-test-qemu. These scripts currently +actual hardware. :doc:`/use/qemu-testing` describes how to setup an +emulation environment using QEMU helper scripts found in +``llvm-project/lldb/scripts/lldb-test-qemu``. These scripts currently work with Arm or AArch64, but support for other architectures can be added easily. Debugging Test Failures diff --git a/lldb/docs/use/qemu-testing.rst b/lldb/docs/use/qemu-testing.rst index 1a4433a991ff711..6e282141864cc1d 100644 --- a/lldb/docs/use/qemu-testing.rst +++ b/lldb/docs/use/qemu-testing.rst @@ -113,6 +113,9 @@ run-qemu.sh has following dependencies: Steps for running lldb-server in QEMU system emulation environment ------------------------------------------------------------------ +Using Bridge Networking +*********************** + * Make sure bridge networking is enabled between host machine and QEMU VM * Find out ip address assigned to eth0 in emulation environment @@ -136,3 +139,39 @@ Steps for running lldb-server in QEMU system emulation environment * Run lldb-server inside QEMU VM * Try connecting to lldb-server running inside QEMU VM with selected ip:port + +Without Bridge Networking +************************* + +Without bridge networking you will have to forward individual ports from the VM +to the host (refer to QEMU's manuals for the specific options). + +* At least one to connect to the intial ``lldb-server``. +* One more if you want to use ``lldb-server`` in ``platform mode``, and have it + start a ``gdbserver`` instance for you. +* A bunch more if you want to run tests against the ``lldb-server`` platform. + +If you are doing either of the latter 2 you should also restrict what ports +``lldb-server tries`` to use, otherwise it will randomly pick one that is almost +certainly not forwarded. An example of this is shown below. + +:: + + $ lldb-server plaform --server --listen 0.0.0.0:54321 \ + --min-gdbserver-port 49140 --max-gdbserver-port 49150 + +The result of this is that: + +* ``lldb-server`` platform mode listens externally on port ``54321``. + +* When it is asked to start a new gdbserver mode instance, it will use a port + in the range ``49140`` to ``49150``. + +Your VM configuration should have ports ``54321``, and ``49140`` to ``49150`` +forwarded for this to work. + +.. note:: + These options are used to create a "port map" within ``lldb-server``. + Unfortunately this map is not shared across all the processes it may create, + and across a few uses you may run out of valid ports. To work around this, + restart the platform every so often, especially after running a set of tests. From lldb-commits at lists.llvm.org Fri Oct 6 07:46:57 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Fri, 06 Oct 2023 07:46:57 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <65201de1.170a0220.da2b0.c357@mx.google.com> https://github.com/JDevlieghere approved this pull request. https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 07:50:48 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Fri, 06 Oct 2023 07:50:48 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <65201ec8.050a0220.84a10.45f2@mx.google.com> ================ @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { ---------------- JDevlieghere wrote: I'd prefer the old way for the sake of consistency. https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 07:52:18 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 07:52:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <65201f22.a70a0220.345e2.43ba@mx.google.com> ================ @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_plugin::dwarf { ---------------- walter-erquinigo wrote: Sure, I'll change that!! https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 07:57:02 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Fri, 06 Oct 2023 07:57:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <6520203e.170a0220.2619a.b3a9@mx.google.com> JDevlieghere wrote: In the previous iteration of this PR, @bulbazord suggested keeping this in the `lldb_private` namespace. Something like `lldb_private::plugin::dwarf` for example? Is there a reason to make this not part of `lldb_private`? As Jim pointed out in that thread, the way LLDB plugins work today, you're stuck with the unstable private API anyway, so being explicit about that might be nice? Is the goals to do the same for other plugins as well? Seems like that could be a largely mechanical change. If we enforce the use of the namesake, it might make it more obvious when folks accidentally try to use them in generic code. WDYT? https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 08:05:44 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 08:05:44 -0700 (PDT) Subject: [Lldb-commits] [lldb] d579a1a - [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (#68408) Message-ID: <65202248.630a0220.b2734.ad5d@mx.google.com> Author: Michael Buch Date: 2023-10-06T16:05:40+01:00 New Revision: d579a1a24adc26794d9a720cea52f7bb8f37c683 URL: https://github.com/llvm/llvm-project/commit/d579a1a24adc26794d9a720cea52f7bb8f37c683 DIFF: https://github.com/llvm/llvm-project/commit/d579a1a24adc26794d9a720cea52f7bb8f37c683.diff LOG: [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (#68408) Split out the assertions that fail on Windows in preparation to XFAILing them. Drive-by change: * Add a missing `self.build()` call in `test_union_in_anon_namespace` * Fix formatting * Add expectedFailureWindows decorator Added: Modified: lldb/packages/Python/lldbsuite/test/decorators.py lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py Removed: ################################################################################ diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 6194862f8fe60f9..b8fea1e02e864de 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -511,6 +511,10 @@ def expectedFailureNetBSD(bugnumber=None): return expectedFailureOS(["netbsd"], bugnumber) +def expectedFailureWindows(bugnumber=None): + return expectedFailureOS(["windows"], bugnumber) + + # TODO: This decorator does not do anything. Remove it. def expectedFlakey(expected_fn, bugnumber=None): def expectedFailure_impl(func): diff --git a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py index 47166636b12647c..1988e997499b222 100644 --- a/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py +++ b/lldb/test/API/lang/cpp/union-static-data-members/TestCppUnionStaticMembers.py @@ -8,14 +8,14 @@ import lldbsuite.test.lldbutil as lldbutil class CppUnionStaticMembersTestCase(TestBase): - def test(self): + def test_print_union(self): """Tests that frame variable and expr work - for union static data members""" + for union with static data members""" self.build() (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( self, "return 0", lldb.SBFileSpec("main.cpp") - ) + ) self.expect("frame variable foo", substrs=["val = 42"]) self.expect("frame variable bar", substrs=["val = 137"]) @@ -27,6 +27,16 @@ def test(self): name="val", value="137" )]) + @expectedFailureWindows + def test_expr_union_static_members(self): + """Tests that frame variable and expr work + for union static data members""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Foo::sVal1", result_type="const int", result_value="-42") self.expect_expr("Foo::sVal2", result_type="Foo", result_children=[ValueCheck( name="val", value="42" @@ -37,6 +47,12 @@ def test_union_in_anon_namespace(self): """Tests that frame variable and expr work for union static data members in anonymous namespaces""" + self.build() + + (target, process, main_thread, _) = lldbutil.run_to_source_breakpoint( + self, "return 0", lldb.SBFileSpec("main.cpp") + ) + self.expect_expr("Bar::sVal1", result_type="const int", result_value="-137") self.expect_expr("Bar::sVal2", result_type="Bar", result_children=[ValueCheck( name="val", value="137" From lldb-commits at lists.llvm.org Fri Oct 6 08:05:47 2023 From: lldb-commits at lists.llvm.org (Michael Buch via lldb-commits) Date: Fri, 06 Oct 2023 08:05:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb[test] TestCppUnionStaticMembers.py: XFAIL assertions on windows (PR #68408) In-Reply-To: Message-ID: <6520224b.170a0220.84589.c02f@mx.google.com> https://github.com/Michael137 closed https://github.com/llvm/llvm-project/pull/68408 From lldb-commits at lists.llvm.org Fri Oct 6 08:16:45 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 08:16:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <652024dd.170a0220.ff3c3.c81c@mx.google.com> walter-erquinigo wrote: @JDevlieghere , now that I think of it twice, I like the approach you mention better. I'll use `lldb_private::plugin::dwarf` for the namespace then. Do you still want these nested namespaces to be defined in three lines instead of one? I could keep consistency as you mention, but I don't know if that would be visually too much. https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 08:20:09 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Fri, 06 Oct 2023 08:20:09 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <652025a9.170a0220.6f054.c9b3@mx.google.com> JDevlieghere wrote: > Do you still want these nested namespaces to be defined in three lines instead of one? I could keep consistency as you mention, but I don't know if that would be visually too much. If the plan is to move all plugins under `lldb_private::plugin` then the one-line is fine as I expect it will be the (new) majority of nested namespace in LLDB. https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 08:24:05 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 08:24:05 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <65202695.170a0220.d5830.c6fa@mx.google.com> walter-erquinigo wrote: sounds good! I'll do the change now https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 08:30:39 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 08:30:39 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef (PR #66768) In-Reply-To: Message-ID: <6520281f.630a0220.f370d.3b8c@mx.google.com> https://github.com/DavidSpickett updated https://github.com/llvm/llvm-project/pull/66768 >From 8b292d573710e1c37227d6f80b9770220abd658e Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 22 Aug 2023 14:42:35 +0100 Subject: [PATCH 1/2] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef This fixes a bug where writing vg during streaming mode could prevent you reading za directly afterwards. vg is invalidated just prior to us reading it in AArch64Reconfigure, but svg was not. This lead to some situations where vg would be updated or read from fresh, but svg would not be. This meant it had some undefined value which lead to errors that prevented us reading ZA. Likely we receieved a lot more data than we were expecting. To fix this, invalidate svg before reconfiguring. This ensures that the value used is the latest one from the remote and matches the procedure for SVE's VG. The bug may depend on timing, I could not find a consistent way to trigger it. I originally found it when checking whether za is disabled after a vg change, so I've added checks for that to TestZAThreadedDynamic. The SVE VG version of the bug did show up on the buildbot, but not consistently. So it's possible that TestZAThreadedDynamic does in fact cover this, but I haven't run it enough times to know. --- .../Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp | 5 +++++ .../za_dynamic_resize/TestZAThreadedDynamic.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp index b127d3d6213a4aa..72280927471f883 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp @@ -783,6 +783,11 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional svg_reg_value; const RegisterInfo *svg_reg_info = m_reg_info_sp->GetRegisterInfo("svg"); if (svg_reg_info) { + // When vg is written it is automatically made invalid. Writing vg will also + // change svg if we're in streaming mode but it will not be made invalid + // so do this manually so the following read gets the latest svg value. + SetRegisterIsValid(svg_reg_info, false); + uint32_t svg_reg_num = svg_reg_info->kinds[eRegisterKindLLDB]; uint64_t reg_value = ReadRegisterAsUnsigned(svg_reg_num, fail_value); if (reg_value != fail_value && reg_value <= 32) diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py index 65d1071c26b2a34..d2a26ce71bde1d8 100644 --- a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py +++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py @@ -125,11 +125,13 @@ def za_test_impl(self, enable_za): self.runCmd("thread select %d" % (idx + 1)) self.check_za_register(4, 2) self.runCmd("register write vg 2") + self.check_disabled_za_register(2) elif stopped_at_line_number == thY_break_line1: self.runCmd("thread select %d" % (idx + 1)) self.check_za_register(2, 3) self.runCmd("register write vg 4") + self.check_disabled_za_register(4) self.runCmd("thread continue 2") self.runCmd("thread continue 3") >From f00e970f03dea81fccae04ae5fe265d7721fb625 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 6 Oct 2023 16:29:28 +0100 Subject: [PATCH 2/2] Also use the invalidates mechanism to link vg and svg. --- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 60070819cb92699..e6e6c12d0404aed 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -374,6 +374,17 @@ void RegisterInfoPOSIX_arm64::AddRegSetSME() { std::make_pair(sme_regnum, m_dynamic_reg_infos.size()); m_dynamic_reg_sets.push_back(g_reg_set_sme_arm64); m_dynamic_reg_sets.back().registers = m_sme_regnum_collection.data(); + + // When vg is written during streaming mode, svg will also change, as vg and + // svg in this state are both showing the streaming vector length. + // We model this as vg invalidating svg. In non-streaming mode this doesn't + // happen but to keep things simple we will invalidate svg anyway. + // + // This must be added now, rather than when vg is defined because SME is a + // dynamic set that may or may not be present. + static const uint32_t vg_invalidates[] = {sme_regnum + 1 /*svg*/, + LLDB_INVALID_REGNUM}; + m_dynamic_reg_infos[GetRegNumSMESVG()].invalidate_regs = vg_invalidates; } uint32_t RegisterInfoPOSIX_arm64::ConfigureVectorLengthSVE(uint32_t sve_vq) { From lldb-commits at lists.llvm.org Fri Oct 6 08:35:33 2023 From: lldb-commits at lists.llvm.org (David Spickett via lldb-commits) Date: Fri, 06 Oct 2023 08:35:33 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb][AArch64] Invalidate SVG prior to reconfiguring ZA regdef (PR #66768) In-Reply-To: Message-ID: <65202945.630a0220.d2c98.2388@mx.google.com> ================ @@ -783,6 +783,11 @@ void GDBRemoteRegisterContext::AArch64Reconfigure() { std::optional svg_reg_value; const RegisterInfo *svg_reg_info = m_reg_info_sp->GetRegisterInfo("svg"); if (svg_reg_info) { + // When vg is written it is automatically made invalid. Writing vg will also + // change svg if we're in streaming mode but it will not be made invalid + // so do this manually so the following read gets the latest svg value. ---------------- DavidSpickett wrote: >From what I've seen there's 2 scenarios to handle: * Explicit write by the user to vg. * We have just stopped and need to get the potentially new svg and vg. The code as it was handled both but is really using mechanisms meant to handle the second scenario. I have done what you suggest in a fixup commit, making vg invalidate svg. This will handle the first scenario specifically. If you approve of that I'll update the commit message prior to landing to explain that we have these two handlers for the 2 scenarios, and when/if they overlap, it is not a problem. All of this is pessimistic, we assume any change to vg in any mode will invalidate svg. I think this is fine though because reading svg again is cheap, and getting the vector lengths mixed would waste a lot of user's time. https://github.com/llvm/llvm-project/pull/66768 From lldb-commits at lists.llvm.org Fri Oct 6 09:20:35 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 09:20:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <652033d3.170a0220.2de1a.d114@mx.google.com> https://github.com/walter-erquinigo updated https://github.com/llvm/llvm-project/pull/68150 >From 96c00272c96f546b57902f0f98b0af383dae24c5 Mon Sep 17 00:00:00 2001 From: walter erquinigo Date: Mon, 2 Oct 2023 16:56:16 -0400 Subject: [PATCH] [LLDB][NFC] Create a namespace for the DWARF plugin As a followup of https://github.com/llvm/llvm-project/pull/67851, I'm defining a new namespace `lldb_plugin::dwarf` for the classes in this Plugins/SymbolFile/DWARF folder. This change is very NFC and helped me with exporting the necessary symbols for my out-of-tree language plugin. The only two classes that I didn't change are DWARFDataExtractor, because that's being explicitly exported as part of lldb_private in `lldb-forward.h` , and the ClangDWARFASTParser, because that shouldn't be in the same namespace as the generic language-agnostic dwarf parser, but I'm okay with changing that. In any case, even if I didn't need this for my work, adding this namespace could be considered a good practice. --- .../include/lldb/Expression/DWARFExpression.h | 24 +- .../lldb/Expression/DWARFExpressionList.h | 9 +- lldb/include/lldb/Symbol/TypeSystem.h | 8 +- lldb/source/Expression/DWARFExpression.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/AppleDWARFIndex.h | 35 +-- .../Plugins/SymbolFile/DWARF/DIERef.cpp | 1 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 7 +- .../SymbolFile/DWARF/DWARFASTParser.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 5 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 + .../SymbolFile/DWARF/DWARFASTParserClang.h | 207 +++++++++++------- .../SymbolFile/DWARF/DWARFAttribute.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 3 + .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp | 5 +- .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.h | 2 + .../SymbolFile/DWARF/DWARFCompileUnit.cpp | 1 + .../SymbolFile/DWARF/DWARFCompileUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.h | 48 ++-- .../Plugins/SymbolFile/DWARF/DWARFDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFDIE.h | 2 + .../SymbolFile/DWARF/DWARFDataExtractor.h | 2 +- .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugArangeSet.h | 2 + .../SymbolFile/DWARF/DWARFDebugAranges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugAranges.h | 2 + .../SymbolFile/DWARF/DWARFDebugInfo.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 12 +- .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugInfoEntry.h | 2 + .../SymbolFile/DWARF/DWARFDebugMacro.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugMacro.h | 6 +- .../SymbolFile/DWARF/DWARFDebugRanges.cpp | 1 + .../SymbolFile/DWARF/DWARFDebugRanges.h | 6 +- .../SymbolFile/DWARF/DWARFDeclContext.cpp | 1 + .../SymbolFile/DWARF/DWARFDeclContext.h | 2 + .../Plugins/SymbolFile/DWARF/DWARFDefines.cpp | 4 +- .../Plugins/SymbolFile/DWARF/DWARFDefines.h | 4 +- .../SymbolFile/DWARF/DWARFFormValue.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 43 ++-- .../SymbolFile/DWARF/DWARFTypeUnit.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h | 4 +- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 28 +-- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 17 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 3 +- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 39 ++-- .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 1 + .../SymbolFile/DWARF/ManualDWARFIndex.h | 37 ++-- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 1 + .../Plugins/SymbolFile/DWARF/NameToDIE.h | 5 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 10 +- .../SymbolFile/DWARF/SymbolFileDWARF.h | 20 +- .../DWARF/SymbolFileDWARFDebugMap.cpp | 3 + .../DWARF/SymbolFileDWARFDebugMap.h | 13 +- .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp | 1 + .../SymbolFile/DWARF/SymbolFileDWARFDwo.h | 9 +- .../SymbolFile/DWARF/UniqueDWARFASTType.cpp | 1 + .../SymbolFile/DWARF/UniqueDWARFASTType.h | 5 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 1 + .../TypeSystem/Clang/TypeSystemClang.h | 2 +- 63 files changed, 403 insertions(+), 268 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 5e03f539a272cac..a4c389ff43eebae 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -18,7 +18,9 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include +namespace lldb_private::plugin::dwarf { class DWARFUnit; +} // namespace lldb_private::plugin::dwarf namespace lldb_private { @@ -64,18 +66,21 @@ class DWARFExpression { /// \return /// The address specified by the operation, if the operation exists, or /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, - bool &error) const; + lldb::addr_t + GetLocation_DW_OP_addr(const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, + bool &error) const; - bool Update_DW_OP_addr(const DWARFUnit *dwarf_cu, lldb::addr_t file_addr); + bool Update_DW_OP_addr(const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, + lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - bool ContainsThreadLocalStorage(const DWARFUnit *dwarf_cu) const; + bool ContainsThreadLocalStorage( + const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu) const; bool LinkThreadLocalStorage( - const DWARFUnit *dwarf_cu, + const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, std::function const &link_address_callback); @@ -128,15 +133,16 @@ class DWARFExpression { /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, lldb::ModuleSP module_sp, const DataExtractor &opcodes, - const DWARFUnit *dwarf_cu, + const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); - static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, - const DataExtractor &data, - DWARFExpressionList *loc_list); + static bool + ParseDWARFLocationList(const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, + const DataExtractor &data, + DWARFExpressionList *loc_list); bool GetExpressionData(DataExtractor &data) const { data = m_data; diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index c0939647056dcbf..930f44742b2b967 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -13,7 +13,9 @@ #include "lldb/Utility/RangeMap.h" #include "lldb/lldb-private.h" +namespace lldb_private::plugin::dwarf { class DWARFUnit; +} // namespace lldb_private::plugin::dwarf namespace lldb_private { @@ -24,13 +26,14 @@ class DWARFExpressionList { public: DWARFExpressionList() = default; - DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + DWARFExpressionList(lldb::ModuleSP module_sp, + const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t func_file_addr) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), m_func_file_addr(func_file_addr) {} DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, - const DWARFUnit *dwarf_cu) + const lldb_private::plugin::dwarf::DWARFUnit *dwarf_cu) : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { AddExpression(0, LLDB_INVALID_ADDRESS, expr); } @@ -136,7 +139,7 @@ class DWARFExpressionList { /// The DWARF compile unit this expression belongs to. It is used to evaluate /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; + const lldb_private::plugin::dwarf::DWARFUnit *m_dwarf_cu = nullptr; // Function base file address. lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index eb6e453e1aec0d0..1eeb9867d76c822 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -28,8 +28,11 @@ #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-private.h" +namespace lldb_private::plugin::dwarf { class DWARFDIE; class DWARFASTParser; +} // namespace lldb_private::plugin::dwarf + class PDBASTParser; namespace lldb_private { @@ -93,7 +96,10 @@ class TypeSystem : public PluginInterface, /// removing all the TypeSystems from the TypeSystemMap. virtual void Finalize() {} - virtual DWARFASTParser *GetDWARFParser() { return nullptr; } + virtual lldb_private::plugin::dwarf::DWARFASTParser *GetDWARFParser() { + return nullptr; + } + virtual PDBASTParser *GetPDBParser() { return nullptr; } virtual npdb::PdbAstBuilder *GetNativePDBParser() { return nullptr; } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 93fcf0579be0b18..fe4928d4f43a434 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,6 +45,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_data() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 34fb98b5a9b690a..325517ca1d2499b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; std::unique_ptr AppleDWARFIndex::Create( Module &module, DWARFDataExtractor apple_names, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h index 6b948e07989531e..d5fd834343d3f89 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -12,15 +12,18 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -namespace lldb_private { +namespace lldb_private::plugin::dwarf { class AppleDWARFIndex : public DWARFIndex { public: static std::unique_ptr - Create(Module &module, DWARFDataExtractor apple_names, - DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, - DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor apple_names, + lldb_private::DWARFDataExtractor apple_namespaces, + lldb_private::DWARFDataExtractor apple_types, + lldb_private::DWARFDataExtractor apple_objc, + lldb_private::DWARFDataExtractor debug_str); - AppleDWARFIndex(Module &module, + AppleDWARFIndex(lldb_private::Module &module, std::unique_ptr apple_names, std::unique_ptr apple_namespaces, std::unique_ptr apple_types, @@ -33,33 +36,33 @@ class AppleDWARFIndex : public DWARFIndex { void Preload() override {} void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: std::unique_ptr m_apple_names_up; @@ -77,6 +80,6 @@ class AppleDWARFIndex : public DWARFIndex { std::optional search_for_tag = std::nullopt, std::optional search_for_qualhash = std::nullopt); }; -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_APPLEDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 88a5e6027557b9b..163e9f4c081cff1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -14,6 +14,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index b5a5cfe263f7804..63379c870524e4a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -14,6 +14,7 @@ #include #include +namespace lldb_private::plugin::dwarf { /// Identifies a DWARF debug info entry within a given Module. It contains three /// "coordinates": /// - file_index: identifies the separate stand alone debug info file @@ -131,10 +132,12 @@ class DIERef { static_assert(sizeof(DIERef) == 8); typedef std::vector DIEArray; +} // namespace lldb_private::plugin::dwarf namespace llvm { -template<> struct format_provider { - static void format(const DIERef &ref, raw_ostream &OS, StringRef Style); +template <> struct format_provider { + static void format(const lldb_private::plugin::dwarf::DIERef &ref, + raw_ostream &OS, StringRef Style); }; } // namespace llvm diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index a68b7cd110eb719..1fe0cadecc9e70e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -18,6 +18,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; std::optional DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 18825ae060b12fe..40f8e6d3f80932f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -17,11 +17,13 @@ #include "lldb/lldb-enumerations.h" #include -class DWARFDIE; namespace lldb_private { class CompileUnit; class ExecutionContext; } + +namespace lldb_private::plugin::dwarf { +class DWARFDIE; class SymbolFileDWARF; class DWARFASTParser { @@ -65,5 +67,6 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index d0065896b0d2292..545a5dcc7d0fd09 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -60,6 +60,8 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; + DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) : m_ast(ast), m_die_to_decl_ctx(), m_decl_ctx_to_die() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 88bfc490e890744..0482ad6c0803721 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -31,45 +31,49 @@ namespace lldb_private { class CompileUnit; } +namespace lldb_private::plugin::dwarf { class DWARFDebugInfoEntry; class SymbolFileDWARF; +} // namespace lldb_private::plugin::dwarf struct ParsedDWARFTypeAttributes; -class DWARFASTParserClang : public DWARFASTParser { +class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { public: DWARFASTParserClang(lldb_private::TypeSystemClang &ast); ~DWARFASTParserClang() override; // DWARFASTParser interface. - lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - bool *type_is_new_ptr) override; + lldb::TypeSP + ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + bool *type_is_new_ptr) override; - lldb_private::ConstString - ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; + lldb_private::ConstString ConstructDemangledNameFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; - lldb_private::CompilerDecl - GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDecl GetDeclForUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext - GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; - lldb_private::CompilerDeclContext - GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -85,9 +89,9 @@ class DWARFASTParserClang : public DWARFASTParser { /// DWARFFormValue with the bit width of the given integer type. /// Returns an error if the value in the DWARFFormValue does not fit /// into the given integer type or the integer type isn't supported. - llvm::Expected - ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, - const DWARFFormValue &form_value) const; + llvm::Expected ExtractIntFromFormValue( + const lldb_private::CompilerType &int_type, + const lldb_private::plugin::dwarf::DWARFFormValue &form_value) const; /// Returns the template parameters of a class DWARFDIE as a string. /// @@ -99,8 +103,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \return A string, including surrounding '<>', of the template parameters. /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. - lldb_private::ConstString - GetDIEClassTemplateParams(const DWARFDIE &die) override; + lldb_private::ConstString GetDIEClassTemplateParams( + const lldb_private::plugin::dwarf::DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -108,14 +112,19 @@ class DWARFASTParserClang : public DWARFASTParser { class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, + clang::DeclContext *> DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, + lldb_private::OptionalClangModuleID> DIEToModuleMap; - typedef llvm::DenseMap + typedef llvm::DenseMap< + const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, clang::Decl *> DIEToDeclMap; lldb_private::TypeSystemClang &m_ast; @@ -126,11 +135,14 @@ class DWARFASTParserClang : public DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); + clang::DeclContext * + GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); + clang::BlockDecl * + ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); + clang::NamespaceDecl * + ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -141,82 +153,98 @@ class DWARFASTParserClang : public DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); + clang::NamespaceDecl *ResolveImportedDeclarationDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die); - bool ParseTemplateDIE(const DWARFDIE &die, + bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const DWARFDIE &parent_die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); + std::string + GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die); bool ParseChildMembers( - const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, + std::vector &member_function_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const DWARFDIE &parent_die, bool skip_artificial, - bool &is_static, bool &is_variadic, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + bool skip_artificial, bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, unsigned &type_quals); - size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, - bool is_signed, uint32_t enumerator_byte_size, - const DWARFDIE &parent_die); + size_t ParseChildEnumerators( + lldb_private::CompilerType &compiler_type, bool is_signed, + uint32_t enumerator_byte_size, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. - lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); - lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); + lldb_private::Type * + GetTypeForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); + clang::Decl * + GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext * + GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, - DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); + clang::DeclContext *GetClangDeclContextContainingDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID + GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die); - bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, - const DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes( + const lldb_private::plugin::dwarf::DWARFDIE &src_class_die, + const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); + clang::DeclContext *GetCachedClangDeclContextForDIE( + const lldb_private::plugin::dwarf::DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, + const lldb_private::plugin::dwarf::DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, + const lldb_private::plugin::dwarf::DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. /// /// Returns \p type_sp. - lldb::TypeSP - UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); + lldb::TypeSP UpdateSymbolContextScopeForType( + const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. - lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - lldb_private::Log *log); + lldb::TypeSP + ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + lldb::ModuleSP + GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die); private: struct FieldInfo { @@ -268,33 +296,41 @@ class DWARFASTParserClang : public DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, - const lldb_private::CompilerType &class_clang_type, - DelayedPropertyList &delayed_properties); + void + ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const lldb_private::CompilerType &class_clang_type, + DelayedPropertyList &delayed_properties); void - ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info); - bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb_private::Type *type, lldb_private::CompilerType &clang_type); - lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParseTypeModifier(const lldb_private::SymbolContext &sc, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const DWARFDIE &die, + lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, - const ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP + ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. /// @@ -311,7 +347,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -328,7 +365,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, + ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die, + const lldb_private::plugin::dwarf::DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -338,7 +376,8 @@ class DWARFASTParserClang : public DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + explicit ParsedDWARFTypeAttributes( + const lldb_private::plugin::dwarf::DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -355,12 +394,12 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; + lldb_private::plugin::dwarf::DWARFDIE object_pointer; + lldb_private::plugin::dwarf::DWARFFormValue abstract_origin; + lldb_private::plugin::dwarf::DWARFFormValue containing_type; + lldb_private::plugin::dwarf::DWARFFormValue signature; + lldb_private::plugin::dwarf::DWARFFormValue specification; + lldb_private::plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; size_t calling_convention = llvm::dwarf::DW_CC_normal; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp index 00b56537ae2b5fe..3d35775e081e341 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.cpp @@ -11,6 +11,7 @@ #include "DWARFDebugInfo.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; DWARFAttributes::DWARFAttributes() : m_infos() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index 90e12fa024936d6..bd3c2aa5431efe2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include +namespace lldb_private::plugin::dwarf { class DWARFUnit; class DWARFAttribute { @@ -31,6 +32,7 @@ class DWARFAttribute { form = m_form; val = m_value; } + protected: dw_attr_t m_attr; dw_form_t m_form; @@ -72,5 +74,6 @@ class DWARFAttributes { typedef llvm::SmallVector collection; collection m_infos; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp index 37a917c3a7661eb..f8924ce639e9a5c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp @@ -18,6 +18,7 @@ #include using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; std::optional DWARFBaseDIE::GetDIERef() const { if (!IsValid()) @@ -35,7 +36,7 @@ dw_tag_t DWARFBaseDIE::Tag() const { } const char *DWARFBaseDIE::GetTagAsCString() const { - return lldb_private::DW_TAG_value_to_name(Tag()); + return DW_TAG_value_to_name(Tag()); } const char *DWARFBaseDIE::GetAttributeValueAsString(const dw_attr_t attr, @@ -120,6 +121,7 @@ DWARFAttributes DWARFBaseDIE::GetAttributes(Recurse recurse) const { return DWARFAttributes(); } +namespace lldb_private::plugin::dwarf { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return lhs.GetDIE() == rhs.GetDIE() && lhs.GetCU() == rhs.GetCU(); } @@ -127,6 +129,7 @@ bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs) { return !(lhs == rhs); } +} // namespace lldb_private::plugin::dwarf const DWARFDataExtractor &DWARFBaseDIE::GetData() const { // Clients must check if this DIE is valid before calling this function. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h index 8bcf807ad163a60..bee026b905950e8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include +namespace lldb_private::plugin::dwarf { class DIERef; class DWARFASTParser; class DWARFAttributes; @@ -124,5 +125,6 @@ class DWARFBaseDIE { bool operator==(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); bool operator!=(const DWARFBaseDIE &lhs, const DWARFBaseDIE &rhs); +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFBASEDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp index f839a59bf6c390b..ec4c297cf7e164c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void DWARFCompileUnit::Dump(Stream *s) const { s->Format( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index 65debac4c7d9265..a55becc3037974c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_private::plugin::dwarf { class DWARFCompileUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override; @@ -40,5 +41,6 @@ class DWARFCompileUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFCOMPILEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index f72dad88e157592..ee347036dbbc034 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; static DWARFDataExtractor LoadSection(SectionList *section_list, SectionType section_type) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index 7df776b5f514155..617ef936bd3a15b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -16,16 +16,16 @@ #include #include -namespace lldb_private { +namespace lldb_private::plugin::dwarf { class DWARFContext { private: - SectionList *m_main_section_list; - SectionList *m_dwo_section_list; + lldb_private::SectionList *m_main_section_list; + lldb_private::SectionList *m_dwo_section_list; mutable std::unique_ptr m_llvm_context; struct SectionData { llvm::once_flag flag; - DWARFDataExtractor data; + lldb_private::DWARFDataExtractor data; }; SectionData m_data_debug_abbrev; @@ -45,39 +45,39 @@ class DWARFContext { SectionData m_data_debug_tu_index; SectionData m_data_debug_types; - const DWARFDataExtractor & + const lldb_private::DWARFDataExtractor & LoadOrGetSection(std::optional main_section_type, std::optional dwo_section_type, SectionData &data); - const DWARFDataExtractor &getOrLoadCuIndexData(); - const DWARFDataExtractor &getOrLoadTuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadCuIndexData(); + const lldb_private::DWARFDataExtractor &getOrLoadTuIndexData(); public: - explicit DWARFContext(SectionList *main_section_list, - SectionList *dwo_section_list) + explicit DWARFContext(lldb_private::SectionList *main_section_list, + lldb_private::SectionList *dwo_section_list) : m_main_section_list(main_section_list), m_dwo_section_list(dwo_section_list) {} - const DWARFDataExtractor &getOrLoadAbbrevData(); - const DWARFDataExtractor &getOrLoadAddrData(); - const DWARFDataExtractor &getOrLoadArangesData(); - const DWARFDataExtractor &getOrLoadDebugInfoData(); - const DWARFDataExtractor &getOrLoadLineData(); - const DWARFDataExtractor &getOrLoadLineStrData(); - const DWARFDataExtractor &getOrLoadLocData(); - const DWARFDataExtractor &getOrLoadLocListsData(); - const DWARFDataExtractor &getOrLoadMacroData(); - const DWARFDataExtractor &getOrLoadRangesData(); - const DWARFDataExtractor &getOrLoadRngListsData(); - const DWARFDataExtractor &getOrLoadStrData(); - const DWARFDataExtractor &getOrLoadStrOffsetsData(); - const DWARFDataExtractor &getOrLoadDebugTypesData(); + const lldb_private::DWARFDataExtractor &getOrLoadAbbrevData(); + const lldb_private::DWARFDataExtractor &getOrLoadAddrData(); + const lldb_private::DWARFDataExtractor &getOrLoadArangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugInfoData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineData(); + const lldb_private::DWARFDataExtractor &getOrLoadLineStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocData(); + const lldb_private::DWARFDataExtractor &getOrLoadLocListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadMacroData(); + const lldb_private::DWARFDataExtractor &getOrLoadRangesData(); + const lldb_private::DWARFDataExtractor &getOrLoadRngListsData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrData(); + const lldb_private::DWARFDataExtractor &getOrLoadStrOffsetsData(); + const lldb_private::DWARFDataExtractor &getOrLoadDebugTypesData(); bool isDwo() { return m_dwo_section_list != nullptr; } llvm::DWARFContext &GetAsLLVM(); }; -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index b31c5dcac91851d..d43c2ac276fb819 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; namespace { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 031ea26ad405094..ba487917efddaec 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" +namespace lldb_private::plugin::dwarf { class DWARFDIE : public DWARFBaseDIE { public: class child_iterator; @@ -126,5 +127,6 @@ class DWARFDIE::child_iterator return *this; } }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h index b9526b079c1e9da..41b8e9ad0217b69 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.h @@ -33,6 +33,6 @@ class DWARFDataExtractor : public DataExtractor { llvm::DWARFDataExtractor GetAsLLVMDWARF() const; llvm::DataExtractor GetAsLLVM() const; }; -} +} // namespace lldb_private #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDATAEXTRACTOR_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index 03cbfd28ae7413a..8461b94abca630d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -13,6 +13,7 @@ #include using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; DWARFDebugArangeSet::DWARFDebugArangeSet() : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h index 3c8633eaa3cce85..bc01adb262dd1be 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_private::plugin::dwarf { class DWARFDebugArangeSet { public: struct Header { @@ -62,5 +63,6 @@ class DWARFDebugArangeSet { Header m_header; DescriptorColl m_arange_descriptors; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index b38dd2b88c9d0b0..da73891f666548b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; // Constructor DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h index 5ff37e400c88403..c935be2793b876b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h @@ -13,6 +13,7 @@ #include "lldb/Utility/RangeMap.h" #include "llvm/Support/Error.h" +namespace lldb_private::plugin::dwarf { class DWARFDebugAranges { protected: typedef lldb_private::RangeDataVector @@ -50,5 +51,6 @@ class DWARFDebugAranges { protected: RangeToDIE m_aranges; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp index 9a33d6338b87d3e..553b6a4c551d205 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp @@ -27,10 +27,10 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; // Constructor -DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context) +DWARFDebugInfo::DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context) : m_dwarf(dwarf), m_context(context), m_units(), m_cu_aranges_up() {} const DWARFDebugAranges &DWARFDebugInfo::GetCompileUnitAranges() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index c990ac9fbe58310..3d3575f8e40875d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -19,20 +19,17 @@ #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" -namespace lldb_private { +namespace lldb_private::plugin::dwarf { class DWARFContext; -} class DWARFDebugInfo { public: - typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, - DWARFUnit *cu, + typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, DWARFUnit *cu, DWARFDebugInfoEntry *die, const dw_offset_t next_offset, const uint32_t depth, void *userData); - explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, - lldb_private::DWARFContext &context); + explicit DWARFDebugInfo(SymbolFileDWARF &dwarf, DWARFContext &context); size_t GetNumUnits(); DWARFUnit *GetUnitAtIndex(size_t idx); @@ -58,7 +55,7 @@ class DWARFDebugInfo { typedef std::vector UnitColl; SymbolFileDWARF &m_dwarf; - lldb_private::DWARFContext &m_context; + DWARFContext &m_context; llvm::once_flag m_units_once_flag; UnitColl m_units; @@ -80,5 +77,6 @@ class DWARFDebugInfo { DWARFDebugInfo(const DWARFDebugInfo &) = delete; const DWARFDebugInfo &operator=(const DWARFDebugInfo &) = delete; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index a6ab83700904cb9..a18836e5d9bbbb4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -35,6 +35,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; extern int g_verbose; // Extract a debug info entry for a given DWARFUnit from the data diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index 29db44a16bb1281..45052b62c75a9f2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -22,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +namespace lldb_private::plugin::dwarf { class DWARFDeclContext; #define DIE_SIBLING_IDX_BITSIZE 31 @@ -190,5 +191,6 @@ class DWARFDebugInfoEntry { void GetAttributes(DWARFUnit *cu, DWARFAttributes &attrs, Recurse recurse, uint32_t curr_depth) const; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGINFOENTRY_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp index 19c6448c4e74a06..2cd84bc55b7519f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp @@ -15,6 +15,7 @@ using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; DWARFDebugMacroHeader DWARFDebugMacroHeader::ParseHeader(const DWARFDataExtractor &debug_macro_data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h index cbf762458331bcd..73141a288a2d80d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h @@ -17,11 +17,10 @@ #include "lldb/lldb-types.h" namespace lldb_private { - class DWARFDataExtractor; +} -} // namespace lldb_private - +namespace lldb_private::plugin::dwarf { class SymbolFileDWARF; class DWARFDebugMacroHeader { @@ -57,5 +56,6 @@ class DWARFDebugMacroEntry { SymbolFileDWARF *sym_file_dwarf, lldb_private::DebugMacrosSP &debug_macros_sp); }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGMACRO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b5bb23a4981f89..fd8f4e12ff770cc 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; DWARFDebugRanges::DWARFDebugRanges() : m_range_map() {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index 2e06cd5daf6f32d..cab6575c9b17444 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -12,21 +12,21 @@ #include "lldb/Core/dwarf.h" #include +namespace lldb_private::plugin::dwarf { class DWARFUnit; -namespace lldb_private { class DWARFContext; -} class DWARFDebugRanges { public: DWARFDebugRanges(); - void Extract(lldb_private::DWARFContext &context); + void Extract(DWARFContext &context); DWARFRangeList FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset) const; protected: std::map m_range_map; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGRANGES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp index 393de0038e651f3..44e76022790130c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp @@ -9,6 +9,7 @@ #include "DWARFDeclContext.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; const char *DWARFDeclContext::GetQualifiedName() const { if (m_qualified_name.empty()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index 13e3dfb70c0cc80..6fbd50d84a6cb4c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -16,6 +16,7 @@ #include #include +namespace lldb_private::plugin::dwarf { // DWARFDeclContext // // A class that represents a declaration context all the way down to a @@ -82,5 +83,6 @@ class DWARFDeclContext { collection m_entries; mutable std::string m_qualified_name; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDECLCONTEXT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp index 4e99a295ce50f7d..941681cf8ece4c6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp @@ -12,7 +12,7 @@ #include #include -namespace lldb_private { +namespace lldb_private::plugin::dwarf { const char *DW_TAG_value_to_name(uint32_t val) { static char invalid[100]; @@ -88,4 +88,4 @@ const char *DW_LNS_value_to_name(uint32_t val) { return llvmstr.data(); } -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h index 2afdbb47381a9cd..5242eefbb62effb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h @@ -12,7 +12,7 @@ #include "lldb/Core/dwarf.h" #include -namespace lldb_private { +namespace lldb_private::plugin::dwarf { typedef uint32_t DRC_class; // Holds DRC_* class bitfields @@ -30,6 +30,6 @@ const char *DW_LANG_value_to_name(uint32_t val); const char *DW_LNS_value_to_name(uint32_t val); -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEFINES_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 6ca17dcf47ff7ba..0a7029a55c047bb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -22,6 +22,7 @@ class DWARFUnit; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index 2a8843c1a0d45df..259ec6b2cfc1ad8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -13,6 +13,7 @@ #include #include +namespace lldb_private::plugin::dwarf { class DWARFUnit; class SymbolFileDWARF; class DWARFDIE; @@ -84,7 +85,8 @@ class DWARFFormValue { // It may be different from compile unit where m_value refers to. const DWARFUnit *m_unit = nullptr; // Unit for this form dw_form_t m_form = dw_form_t(0); // Form for this value - ValueType m_value; // Contains all data for the form + ValueType m_value; // Contains all data for the form }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 779b52481b856d8..b1c323b101cef3a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -17,6 +17,7 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_private::plugin::dwarf; DWARFIndex::~DWARFIndex() = default; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 13fe96dae2aa1d6..d087651d20abdb2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -17,13 +17,13 @@ #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" +namespace lldb_private::plugin::dwarf { class DWARFDeclContext; class DWARFDIE; -namespace lldb_private { class DWARFIndex { public: - DWARFIndex(Module &module) : m_module(module) {} + DWARFIndex(lldb_private::Module &module) : m_module(module) {} virtual ~DWARFIndex(); virtual void Preload() = 0; @@ -32,53 +32,56 @@ class DWARFIndex { /// (e.g., to only retrieve variables from a given context) should be done by /// the consumer. virtual void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) = 0; virtual void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; /// \a cu must be the skeleton unit if possible, not GetNonSkeletonUnit(). virtual void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) = 0; virtual void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) = 0; virtual void - GetCompleteObjCClass(ConstString class_name, bool must_be_implementation, + GetCompleteObjCClass(lldb_private::ConstString class_name, + bool must_be_implementation, llvm::function_ref callback) = 0; - virtual void GetTypes(ConstString name, + virtual void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) = 0; virtual void - GetNamespaces(ConstString name, + GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) = 0; virtual void - GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, + SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) = 0; virtual void - GetFunctions(const RegularExpression ®ex, + GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) = 0; - virtual void Dump(Stream &s) = 0; + virtual void Dump(lldb_private::Stream &s) = 0; - StatsDuration::Duration GetIndexTime() { return m_index_time; } + lldb_private::StatsDuration::Duration GetIndexTime() { return m_index_time; } protected: - Module &m_module; - StatsDuration m_index_time; + lldb_private::Module &m_module; + lldb_private::StatsDuration m_index_time; /// Helper function implementing common logic for processing function dies. If /// the function given by "ref" matches search criteria given by /// "parent_decl_ctx" and "name_type_mask", it is inserted into the "dies" /// vector. - bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DIERef ref, - SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, - llvm::function_ref callback); + bool + ProcessFunctionDIE(const lldb_private::Module::LookupInfo &lookup_info, + DIERef ref, SymbolFileDWARF &dwarf, + const lldb_private::CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); class DIERefCallbackImpl { public: @@ -102,6 +105,6 @@ class DWARFIndex { void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; }; -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp index 87af7177ca95ee9..4f3a3f54465371f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp @@ -13,6 +13,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void DWARFTypeUnit::Dump(Stream *s) const { s->Format("{0:x16}: Type Unit: length = {1:x8}, version = {2:x4}, " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 5d939582a312e98..40157c7e5d5447b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -14,8 +14,9 @@ namespace llvm { class DWARFAbbreviationDeclarationSet; -} +} // namespace llvm +namespace lldb_private::plugin::dwarf { class DWARFTypeUnit : public DWARFUnit { public: void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {} @@ -37,5 +38,6 @@ class DWARFTypeUnit : public DWARFUnit { friend class DWARFUnit; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFTYPEUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index a09c68087c47659..6f771c66a725cf3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -28,6 +28,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; extern int g_verbose; @@ -201,8 +202,8 @@ DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs) rhs.m_cu = nullptr; } -DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=( - DWARFUnit::ScopedExtractDIEs &&rhs) { +DWARFUnit::ScopedExtractDIEs & +DWARFUnit::ScopedExtractDIEs::operator=(DWARFUnit::ScopedExtractDIEs &&rhs) { m_cu = rhs.m_cu; rhs.m_cu = nullptr; m_clear_dies = rhs.m_clear_dies; @@ -311,9 +312,9 @@ void DWARFUnit::ExtractDIEsRWLocked() { } if (!m_die_array.empty()) { - // The last die cannot have children (if it did, it wouldn't be the last one). - // This only makes a difference for malformed dwarf that does not have a - // terminating null die. + // The last die cannot have children (if it did, it wouldn't be the last + // one). This only makes a difference for malformed dwarf that does not have + // a terminating null die. m_die_array.back().SetHasChildren(false); if (m_first_die) { @@ -720,7 +721,7 @@ void DWARFUnit::ParseProducerInfo() { llvm::SmallVector matches; if (g_swiftlang_version_regex.Execute(producer, &matches)) { - m_producer_version.tryParse(matches[1]); + m_producer_version.tryParse(matches[1]); m_producer = eProducerSwift; } else if (producer.contains("clang")) { if (g_clang_version_regex.Execute(producer, &matches)) @@ -905,9 +906,10 @@ llvm::Error DWARFUnitHeader::ApplyIndexEntry( return llvm::Error::success(); } -llvm::Expected DWARFUnitHeader::extract( - const DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &context, lldb::offset_t *offset_ptr) { +llvm::Expected +DWARFUnitHeader::extract(const DWARFDataExtractor &data, + DIERef::Section section, DWARFContext &context, + lldb::offset_t *offset_ptr) { DWARFUnitHeader header; header.m_offset = *offset_ptr; header.m_length = data.GetDWARFInitialLength(offset_ptr); @@ -1086,22 +1088,20 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { return ranges; } -llvm::Expected -DWARFUnit::FindRnglistFromIndex(uint32_t index) { +llvm::Expected DWARFUnit::FindRnglistFromIndex(uint32_t index) { llvm::Expected maybe_offset = GetRnglistOffset(index); if (!maybe_offset) return maybe_offset.takeError(); return FindRnglistFromOffset(*maybe_offset); } - bool DWARFUnit::HasAny(llvm::ArrayRef tags) { ExtractUnitDIEIfNeeded(); if (m_dwo) return m_dwo->HasAny(tags); - for (const auto &die: m_die_array) { - for (const auto tag: tags) { + for (const auto &die : m_die_array) { + for (const auto tag : tags) { if (tag == die.Tag()) return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 20871d805e77a87..a8d9dbfe760334c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -19,6 +19,7 @@ #include #include +namespace lldb_private::plugin::dwarf { class DWARFUnit; class DWARFCompileUnit; class NameToDIE; @@ -80,8 +81,7 @@ class DWARFUnitHeader { static llvm::Expected extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section, - lldb_private::DWARFContext &dwarf_context, - lldb::offset_t *offset_ptr); + DWARFContext &dwarf_context, lldb::offset_t *offset_ptr); }; class DWARFUnit : public lldb_private::UserID { @@ -104,6 +104,7 @@ class DWARFUnit : public lldb_private::UserID { class ScopedExtractDIEs { DWARFUnit *m_cu; + public: bool m_clear_dies = false; ScopedExtractDIEs(DWARFUnit &cu); @@ -227,7 +228,9 @@ class DWARFUnit : public lldb_private::UserID { uint8_t GetUnitType() const { return m_header.GetUnitType(); } bool IsTypeUnit() const { return m_header.IsTypeUnit(); } /// Note that this check only works for DWARF5+. - bool IsSkeletonUnit() const { return GetUnitType() == llvm::dwarf::DW_UT_skeleton; } + bool IsSkeletonUnit() const { + return GetUnitType() == llvm::dwarf::DW_UT_skeleton; + } std::optional GetStringOffsetSectionItem(uint32_t index) const; @@ -272,7 +275,6 @@ class DWARFUnit : public lldb_private::UserID { /// True if any DIEs match any tag in \a tags, false otherwise. bool HasAny(llvm::ArrayRef tags); - /// Get the fission .dwo file specific error for this compile unit. /// /// The skeleton compile unit only can have a DWO error. Any other type @@ -351,9 +353,9 @@ class DWARFUnit : public lldb_private::UserID { lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; std::optional m_comp_dir; std::optional m_file_spec; - std::optional m_addr_base; ///< Value of DW_AT_addr_base. - dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. - dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. + std::optional m_addr_base; ///< Value of DW_AT_addr_base. + dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base. + dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base. std::optional m_gnu_addr_base; std::optional m_gnu_ranges_base; @@ -390,5 +392,6 @@ class DWARFUnit : public lldb_private::UserID { DWARFUnit(const DWARFUnit &) = delete; const DWARFUnit &operator=(const DWARFUnit &) = delete; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFUNIT_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index af2d6c554140bc0..292ea2806c59dc7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -18,6 +18,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; llvm::Expected> DebugNamesDWARFIndex::Create(Module &module, DWARFDataExtractor debug_names, @@ -227,7 +228,7 @@ void DebugNamesDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(name.GetStringRef())) { - dwarf::Tag entry_tag = entry.tag(); + lldb_private::dwarf::Tag entry_tag = entry.tag(); if (entry_tag == DW_TAG_namespace || entry_tag == DW_TAG_imported_declaration) { if (!ProcessEntry(entry, callback)) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index abbd700f1603feb..a8715c0ee0858b6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -17,50 +17,51 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include -namespace lldb_private { +namespace lldb_private::plugin::dwarf { class DebugNamesDWARFIndex : public DWARFIndex { public: static llvm::Expected> - Create(Module &module, DWARFDataExtractor debug_names, - DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); + Create(lldb_private::Module &module, + lldb_private::DWARFDataExtractor debug_names, + lldb_private::DWARFDataExtractor debug_str, SymbolFileDWARF &dwarf); void Preload() override { m_fallback.Preload(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &cu, llvm::function_ref callback) override; void - GetObjCMethods(ConstString class_name, + GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override {} void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; private: - DebugNamesDWARFIndex(Module &module, + DebugNamesDWARFIndex(lldb_private::Module &module, std::unique_ptr debug_names_up, - DWARFDataExtractor debug_names_data, - DWARFDataExtractor debug_str_data, + lldb_private::DWARFDataExtractor debug_names_data, + lldb_private::DWARFDataExtractor debug_str_data, SymbolFileDWARF &dwarf) : DWARFIndex(module), m_debug_info(dwarf.DebugInfo()), m_debug_names_data(debug_names_data), m_debug_str_data(debug_str_data), @@ -71,8 +72,8 @@ class DebugNamesDWARFIndex : public DWARFIndex { // LLVM DWARFDebugNames will hold a non-owning reference to this data, so keep // track of the ownership here. - DWARFDataExtractor m_debug_names_data; - DWARFDataExtractor m_debug_str_data; + lldb_private::DWARFDataExtractor m_debug_names_data; + lldb_private::DWARFDataExtractor m_debug_str_data; using DebugNames = llvm::DWARFDebugNames; std::unique_ptr m_debug_names_up; @@ -89,6 +90,6 @@ class DebugNamesDWARFIndex : public DWARFIndex { static llvm::DenseSet GetUnits(const DebugNames &debug_names); }; -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DEBUGNAMESDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 90f18c96afa230e..16ff5f7d4842cae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -28,6 +28,7 @@ using namespace lldb_private; using namespace lldb; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; void ManualDWARFIndex::Index() { if (m_indexed) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index d95cf501face8e4..7a54b2d78c24c8c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -13,13 +13,13 @@ #include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/ADT/DenseSet.h" +namespace lldb_private::plugin::dwarf { class DWARFDebugInfo; class SymbolFileDWARFDwo; -namespace lldb_private { class ManualDWARFIndex : public DWARFIndex { public: - ManualDWARFIndex(Module &module, SymbolFileDWARF &dwarf, + ManualDWARFIndex(lldb_private::Module &module, SymbolFileDWARF &dwarf, llvm::DenseSet units_to_avoid = {}) : DWARFIndex(module), m_dwarf(&dwarf), m_units_to_avoid(std::move(units_to_avoid)) {} @@ -27,33 +27,33 @@ class ManualDWARFIndex : public DWARFIndex { void Preload() override { Index(); } void - GetGlobalVariables(ConstString basename, + GetGlobalVariables(lldb_private::ConstString basename, llvm::function_ref callback) override; void - GetGlobalVariables(const RegularExpression ®ex, + GetGlobalVariables(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; void GetGlobalVariables(DWARFUnit &unit, llvm::function_ref callback) override; - void GetObjCMethods(ConstString class_name, + void GetObjCMethods(lldb_private::ConstString class_name, llvm::function_ref callback) override; void GetCompleteObjCClass( - ConstString class_name, bool must_be_implementation, + lldb_private::ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; - void GetTypes(ConstString name, + void GetTypes(lldb_private::ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, llvm::function_ref callback) override; - void GetNamespaces(ConstString name, + void GetNamespaces(lldb_private::ConstString name, llvm::function_ref callback) override; - void GetFunctions(const Module::LookupInfo &lookup_info, + void GetFunctions(const lldb_private::Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, - const CompilerDeclContext &parent_decl_ctx, + const lldb_private::CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) override; - void GetFunctions(const RegularExpression ®ex, + void GetFunctions(const lldb_private::RegularExpression ®ex, llvm::function_ref callback) override; - void Dump(Stream &s) override; + void Dump(lldb_private::Stream &s) override; // Make IndexSet public so we can unit test the encoding and decoding logic. struct IndexSet { @@ -65,8 +65,9 @@ class ManualDWARFIndex : public DWARFIndex { NameToDIE globals; NameToDIE types; NameToDIE namespaces; - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); - void Encode(DataEncoder &encoder) const; + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr); + void Encode(lldb_private::DataEncoder &encoder) const; bool operator==(const IndexSet &rhs) const { return function_basenames == rhs.function_basenames && function_fullnames == rhs.function_fullnames && @@ -94,8 +95,8 @@ class ManualDWARFIndex : public DWARFIndex { /// All strings in cache files are put into string tables for efficiency /// and cache file size reduction. Strings are stored as uint32_t string /// table offsets in the cache data. - bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, - bool &signature_mismatch); + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr, bool &signature_mismatch); /// Encode this object into a data encoder object. /// @@ -112,7 +113,7 @@ class ManualDWARFIndex : public DWARFIndex { /// \return /// True if the symbol table's object file can generate a valid signature /// and all data for the symbol table was encoded, false otherwise. - bool Encode(DataEncoder &encoder) const; + bool Encode(lldb_private::DataEncoder &encoder) const; /// Get the cache key string for this symbol table. /// @@ -173,6 +174,6 @@ class ManualDWARFIndex : public DWARFIndex { IndexSet m_set; bool m_indexed = false; }; -} // namespace lldb_private +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 89e628f5eaf1c47..44d90648700cfb8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -20,6 +20,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; void NameToDIE::Finalize() { m_map.Sort(std::less()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 61df1a628ab5913..71f21f2945421c9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -16,6 +16,7 @@ #include "lldb/Core/dwarf.h" #include "lldb/lldb-defines.h" +namespace lldb_private::plugin::dwarf { class DWARFUnit; class NameToDIE { @@ -45,8 +46,7 @@ class NameToDIE { void ForEach(std::function const - &callback) const; + const DIERef &die_ref)> const &callback) const; /// Decode a serialized version of this object from data. /// @@ -89,5 +89,6 @@ class NameToDIE { protected: lldb_private::UniqueCStringMap m_map; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e472074545a6f07..d5b2b9381fc4202 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -97,6 +97,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; LLDB_PLUGIN_DEFINE(SymbolFileDWARF) @@ -136,9 +137,8 @@ static PluginProperties &GetGlobalPluginProperties() { } static const llvm::DWARFDebugLine::LineTable * -ParseLLVMLineTable(lldb_private::DWARFContext &context, - llvm::DWARFDebugLine &line, dw_offset_t line_offset, - dw_offset_t unit_offset) { +ParseLLVMLineTable(DWARFContext &context, llvm::DWARFDebugLine &line, + dw_offset_t line_offset, dw_offset_t unit_offset) { Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -159,7 +159,7 @@ ParseLLVMLineTable(lldb_private::DWARFContext &context, return *line_table; } -static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, +static bool ParseLLVMLineTablePrologue(DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { @@ -2428,7 +2428,7 @@ bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, // ...But if we are only checking root decl contexts, confirm that the // 'die' is a top-level context. if (only_root_namespaces) - return die.GetParent().Tag() == dwarf::DW_TAG_compile_unit; + return die.GetParent().Tag() == llvm::dwarf::DW_TAG_compile_unit; return true; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 5aaf8bd270ef7b1..87a82d12105a223 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -38,6 +38,13 @@ #include "DWARFIndex.h" #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace llvm { +class DWARFDebugAbbrev; +} // namespace llvm + +namespace lldb_private::plugin::dwarf { // Forward Declarations for this DWARF plugin class DebugMapModule; class DWARFCompileUnit; @@ -54,10 +61,6 @@ class SymbolFileDWARFDwo; class SymbolFileDWARFDwp; class UserID; -namespace llvm { -class DWARFDebugAbbrev; -} - #define DIE_IS_BEING_PARSED ((lldb_private::Type *)1) class SymbolFileDWARF : public lldb_private::SymbolFileCommon { @@ -78,7 +81,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; // Static Functions static void Initialize(); @@ -285,7 +288,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { void DumpClangAST(lldb_private::Stream &s) override; - lldb_private::DWARFContext &GetDWARFContext() { return m_context; } + DWARFContext &GetDWARFContext() { return m_context; } const std::shared_ptr &GetDwpSymbolFile(); @@ -534,7 +537,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { llvm::once_flag m_dwp_symfile_once_flag; std::shared_ptr m_dwp_symfile; - lldb_private::DWARFContext m_context; + DWARFContext m_context; llvm::once_flag m_info_once_flag; std::unique_ptr m_info; @@ -547,7 +550,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { DebugMacrosMap m_debug_macros_map; ExternalTypeModuleMap m_external_type_modules; - std::unique_ptr m_index; + std::unique_ptr m_index; bool m_fetched_external_modules : 1; lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; @@ -578,5 +581,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFileCommon { /// an index that identifies the .DWO or .o file. std::optional m_file_index; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index eadedd32e1a4aaf..86ae2a1c07682f7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -42,6 +42,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; char SymbolFileDWARFDebugMap::ID; @@ -166,6 +167,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( return file_range_map; } +namespace lldb_private::plugin::dwarf { class DebugMapModule : public Module { public: DebugMapModule(const ModuleSP &exe_module_sp, uint32_t cu_idx, @@ -222,6 +224,7 @@ class DebugMapModule : public Module { ModuleWP m_exe_module_wp; const uint32_t m_cu_idx; }; +} // namespace lldb_private::plugin::dwarf void SymbolFileDWARFDebugMap::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index 881fd4c45ff05a0..6e67b551698c779 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -20,6 +20,9 @@ #include "UniqueDWARFASTType.h" +class DWARFASTParserClang; + +namespace lldb_private::plugin::dwarf { class SymbolFileDWARF; class DWARFCompileUnit; class DWARFDebugAranges; @@ -161,7 +164,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; friend class DebugMapModule; - friend class DWARFASTParserClang; + friend class ::DWARFASTParserClang; friend class DWARFCompileUnit; friend class SymbolFileDWARF; struct OSOInfo { @@ -296,9 +299,10 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { bool Supports_DW_AT_APPLE_objc_complete_type(SymbolFileDWARF *skip_dwarf_oso); - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation); + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation); UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; @@ -403,5 +407,6 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFileCommon { size_t AddOSOARanges(SymbolFileDWARF *dwarf2Data, DWARFDebugAranges *debug_aranges); }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDEBUGMAP_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 78c3c19684e116d..60313ca3a0f7b25 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -21,6 +21,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; char SymbolFileDWARFDwo::ID; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index e98ea49d939baf0..dc046c9f7c991f7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -12,6 +12,7 @@ #include "SymbolFileDWARF.h" #include +namespace lldb_private::plugin::dwarf { class SymbolFileDWARFDwo : public SymbolFileDWARF { /// LLVM RTTI support. static char ID; @@ -65,9 +66,10 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { lldb::TypeSP FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) override; - lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( - const DWARFDIE &die, lldb_private::ConstString type_name, - bool must_be_implementation) override; + lldb::TypeSP + FindCompleteObjCDefinitionTypeForDIE(const DWARFDIE &die, + lldb_private::ConstString type_name, + bool must_be_implementation) override; SymbolFileDWARF &GetBaseSymbolFile() const { return m_base_symbol_file; } @@ -77,5 +79,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { SymbolFileDWARF &m_base_symbol_file; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARFDWO_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 22a921cf61389bc..223518f0ae82418 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/Declaration.h" using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, const lldb_private::Declaration &decl, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index 0947d1e581c5237..bb1d9157f06a248 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -16,6 +16,7 @@ #include "DWARFDIE.h" #include "lldb/Core/Declaration.h" +namespace lldb_private::plugin::dwarf { class UniqueDWARFASTType { public: // Constructors and Destructors @@ -74,8 +75,7 @@ class UniqueDWARFASTTypeMap { ~UniqueDWARFASTTypeMap() = default; - void Insert(lldb_private::ConstString name, - const UniqueDWARFASTType &entry) { + void Insert(lldb_private::ConstString name, const UniqueDWARFASTType &entry) { m_collection[name.GetCString()].Append(entry); } @@ -95,5 +95,6 @@ class UniqueDWARFASTTypeMap { typedef llvm::DenseMap collection; collection m_collection; }; +} // namespace lldb_private::plugin::dwarf #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_UNIQUEDWARFASTTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 69cff0f35ae4ab2..d6a3d067b597ffc 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -86,6 +86,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; +using namespace lldb_private::plugin::dwarf; using namespace clang; using llvm::StringSwitch; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 0544de3cd33befb..7fe39391f254fe6 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -514,7 +514,7 @@ class TypeSystemClang : public TypeSystem { size_t bit_size); // TypeSystem methods - DWARFASTParser *GetDWARFParser() override; + lldb_private::plugin::dwarf::DWARFASTParser *GetDWARFParser() override; PDBASTParser *GetPDBParser() override; npdb::PdbAstBuilder *GetNativePDBParser() override; From lldb-commits at lists.llvm.org Fri Oct 6 09:21:40 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 09:21:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <65203414.170a0220.40d4a.d189@mx.google.com> https://github.com/walter-erquinigo edited https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 09:21:49 2023 From: lldb-commits at lists.llvm.org (Walter Erquinigo via lldb-commits) Date: Fri, 06 Oct 2023 09:21:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [LLDB][NFC] Create a namespace for the DWARF plugin (PR #68150) In-Reply-To: Message-ID: <6520341d.630a0220.fd6ac.b3c8@mx.google.com> walter-erquinigo wrote: @JDevlieghere PTAL https://github.com/llvm/llvm-project/pull/68150 From lldb-commits at lists.llvm.org Fri Oct 6 09:49:22 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 09:49:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65203a92.170a0220.79ed8.d6cb@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/11] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/11] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/11] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/11] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/11] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/11] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/11] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/11] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/11] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/11] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] >From e562d1ca2297ec907c719b089ce77ea7f91a28a3 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 09:48:36 -0700 Subject: [PATCH 11/11] changed header protos --- mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 861b7eff65115b6..f25df11d15fdad1 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -134,8 +134,8 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp); /// SparseTensorReader method to obtain direct access to the @@ -149,7 +149,8 @@ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_CRUNNERUTILS_EXPORT bool \ _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ - StridedMemRefType *iref, StridedMemRefType *vref) \ + StridedMemRefType *lvl2dimRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) \ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) #undef DECL_GETNEXT From lldb-commits at lists.llvm.org Fri Oct 6 10:27:04 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 10:27:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65204368.170a0220.79ed8.daaa@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/12] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/12] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/12] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/12] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/12] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/12] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/12] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/12] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/12] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/12] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] >From e562d1ca2297ec907c719b089ce77ea7f91a28a3 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 09:48:36 -0700 Subject: [PATCH 11/12] changed header protos --- mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 861b7eff65115b6..f25df11d15fdad1 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -134,8 +134,8 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp); /// SparseTensorReader method to obtain direct access to the @@ -149,7 +149,8 @@ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_CRUNNERUTILS_EXPORT bool \ _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ - StridedMemRefType *iref, StridedMemRefType *vref) \ + StridedMemRefType *lvl2dimRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) \ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) #undef DECL_GETNEXT >From 0dda2e88c0760a29a099897b1a0751513f510959 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:24:59 -0700 Subject: [PATCH 12/12] simpler MapRef --- .../ExecutionEngine/SparseTensor/MapRef.h | 53 ++++++------------- .../ExecutionEngine/SparseTensor/MapRef.cpp | 28 ++-------- 2 files changed, 21 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index a1bd6798f150b43..e63412498a1abb8 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -16,6 +16,7 @@ #include #include +#include namespace mlir { namespace sparse_tensor { @@ -23,12 +24,11 @@ namespace sparse_tensor { /// A class for capturing the sparse tensor type map with a compact encoding. /// /// Currently, the following situations are supported: -/// (1) map is an identity -/// (2) map is a permutation -/// (3) map has affine ops (restricted set) +/// (1) map is a permutation +/// (2) map has affine ops (restricted set) /// -/// The pushforward/backward operations are fast for (1) and (2) but -/// incur some obvious overhead for situation (3). +/// The pushforward/backward operations are fast for (1) but incur some obvious +/// overhead for situation (2). /// class MapRef final { public: @@ -38,20 +38,12 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template - inline void pushforward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < dimRank; ++i) - out[i] = in[i]; // TODO: optimize with in == out ? - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < dimRank; ++i) - out[dim2lvl[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushforward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[lvl2dim[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -59,20 +51,12 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template - inline void pushbackward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < lvlRank; ++i) - out[i] = in[i]; - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < lvlRank; ++i) - out[lvl2dim[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushbackward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[dim2lvl[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -80,16 +64,13 @@ class MapRef final { uint64_t getLvlRank() const { return lvlRank; } private: - enum class MapKind { kIdentity, kPermutation, kAffine }; - - bool isIdentity() const; - bool isPermutation() const; + bool isPermutationMap() const; - MapKind kind; const uint64_t dimRank; const uint64_t lvlRank; const uint64_t *const dim2lvl; // non-owning pointer const uint64_t *const lvl2dim; // non-owning pointer + const bool isPermutation; }; } // namespace sparse_tensor diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp index ed458afeae746bc..ee4d6fa0d34b491 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -6,39 +6,19 @@ // //===----------------------------------------------------------------------===// -#include - #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d) - : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { - assert(d2l && l2d); - // Determine the kind of mapping (and asserts on simple inference). - if (isIdentity()) { - kind = MapKind::kIdentity; - for (uint64_t i = 0; i < dimRank; i++) - assert(lvl2dim[i] == i); - } else if (isPermutation()) { - kind = MapKind::kPermutation; + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d), + isPermutation(isPermutationMap()) { + if (isPermutation) { for (uint64_t i = 0; i < dimRank; i++) assert(lvl2dim[dim2lvl[i]] == i); - } else { - kind = MapKind::kAffine; - } -} - -bool mlir::sparse_tensor::MapRef::isIdentity() const { - if (dimRank != lvlRank) - return false; - for (uint64_t i = 0; i < dimRank; i++) { - if (dim2lvl[i] != i) - return false; } - return true; } -bool mlir::sparse_tensor::MapRef::isPermutation() const { +bool mlir::sparse_tensor::MapRef::isPermutationMap() const { if (dimRank != lvlRank) return false; std::vector seen(dimRank, false); From lldb-commits at lists.llvm.org Fri Oct 6 10:30:47 2023 From: lldb-commits at lists.llvm.org (Peiming Liu via lldb-commits) Date: Fri, 06 Oct 2023 10:30:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65204447.a70a0220.2dde.51a3@mx.google.com> https://github.com/PeimingLiu approved this pull request. https://github.com/llvm/llvm-project/pull/68360 From lldb-commits at lists.llvm.org Fri Oct 6 10:43:35 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 10:43:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] 8f378ff - [lldb] Expose SBPlatform::GetAllProcesses to the SB API (#68378) Message-ID: <65204747.050a0220.801e2.578a@mx.google.com> Author: Jonas Devlieghere Date: 2023-10-06T10:43:31-07:00 New Revision: 8f378ff7a0a36137619a446b0bb13b8bc0ef6721 URL: https://github.com/llvm/llvm-project/commit/8f378ff7a0a36137619a446b0bb13b8bc0ef6721 DIFF: https://github.com/llvm/llvm-project/commit/8f378ff7a0a36137619a446b0bb13b8bc0ef6721.diff LOG: [lldb] Expose SBPlatform::GetAllProcesses to the SB API (#68378) Add the ability to list all processes through the SB API. rdar://116188959 Added: lldb/bindings/interface/SBProcessInfoListExtensions.i lldb/include/lldb/API/SBProcessInfoList.h lldb/source/API/SBProcessInfoList.cpp lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py Modified: lldb/bindings/headers.swig lldb/bindings/interfaces.swig lldb/include/lldb/API/LLDB.h lldb/include/lldb/API/SBDefines.h lldb/include/lldb/API/SBPlatform.h lldb/include/lldb/API/SBProcessInfo.h lldb/include/lldb/Target/Platform.h lldb/include/lldb/Utility/ProcessInfo.h lldb/source/API/CMakeLists.txt lldb/source/API/SBPlatform.cpp lldb/source/Target/Platform.cpp Removed: ################################################################################ diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index d392ed43d8c0c9e..b1d88726f754354 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -46,6 +46,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/bindings/interface/SBProcessInfoListExtensions.i b/lldb/bindings/interface/SBProcessInfoListExtensions.i new file mode 100644 index 000000000000000..42999846ef6a52f --- /dev/null +++ b/lldb/bindings/interface/SBProcessInfoListExtensions.i @@ -0,0 +1,13 @@ +%extend lldb::SBProcessInfoList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of process info in a lldb.SBProcessInfoListExtensions object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the process info in a lldb.SBProcessInfoListExtensions object.''' + return lldb_iter(self, 'GetSize', 'GetProcessInfoAtIndex') + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 306cfe683893271..373c2f6cf545cfb 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -122,6 +122,7 @@ %include "lldb/API/SBPlatform.h" %include "lldb/API/SBProcess.h" %include "lldb/API/SBProcessInfo.h" +%include "lldb/API/SBProcessInfoList.h" %include "lldb/API/SBQueue.h" %include "lldb/API/SBQueueItem.h" %include "lldb/API/SBReproducer.h" @@ -184,6 +185,7 @@ %include "./interface/SBModuleSpecExtensions.i" %include "./interface/SBModuleSpecListExtensions.i" %include "./interface/SBProcessExtensions.i" +%include "./interface/SBProcessInfoListExtensions.i" %include "./interface/SBQueueItemExtensions.i" %include "./interface/SBScriptObjectExtensions.i" %include "./interface/SBSectionExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index eacbbeafcf1cd86..f652d1bdb835b59 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -49,6 +49,7 @@ #include "lldb/API/SBPlatform.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index ec5e940fdaf36fc..c6f01cc03f263c8 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -90,6 +90,7 @@ class LLDB_API SBPlatformConnectOptions; class LLDB_API SBPlatformShellCommand; class LLDB_API SBProcess; class LLDB_API SBProcessInfo; +class LLDB_API SBProcessInfoList; class LLDB_API SBQueue; class LLDB_API SBQueueItem; class LLDB_API SBReplayOptions; diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index e0acc7003a54bc3..d63d2ed1eaba627 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -11,11 +11,13 @@ #include "lldb/API/SBDefines.h" #include "lldb/API/SBProcess.h" +#include "lldb/API/SBProcessInfoList.h" #include struct PlatformConnectOptions; struct PlatformShellCommand; +class ProcessInstanceInfoMatch; namespace lldb { @@ -154,6 +156,8 @@ class LLDB_API SBPlatform { SBProcess Attach(SBAttachInfo &attach_info, const SBDebugger &debugger, SBTarget &target, SBError &error); + SBProcessInfoList GetAllProcesses(SBError &error); + SBError Kill(const lldb::pid_t pid); SBError diff --git a/lldb/include/lldb/API/SBProcessInfo.h b/lldb/include/lldb/API/SBProcessInfo.h index 36fae9e842a6136..aec5924e4704a49 100644 --- a/lldb/include/lldb/API/SBProcessInfo.h +++ b/lldb/include/lldb/API/SBProcessInfo.h @@ -55,6 +55,7 @@ class LLDB_API SBProcessInfo { private: friend class SBProcess; + friend class SBProcessInfoList; lldb_private::ProcessInstanceInfo &ref(); diff --git a/lldb/include/lldb/API/SBProcessInfoList.h b/lldb/include/lldb/API/SBProcessInfoList.h new file mode 100644 index 000000000000000..9d3f65c46fbbe69 --- /dev/null +++ b/lldb/include/lldb/API/SBProcessInfoList.h @@ -0,0 +1,46 @@ +//===-- SBProcessInfoList.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBPROCESSINSTANCEINFOLIST_H +#define LLDB_API_SBPROCESSINSTANCEINFOLIST_H + +#include "lldb/API/SBDefines.h" + +#include + +namespace lldb_private { +class ProcessInfoList; +} // namespace lldb_private + +namespace lldb { + +class LLDB_API SBProcessInfoList { +public: + SBProcessInfoList(); + ~SBProcessInfoList(); + + SBProcessInfoList(const lldb::SBProcessInfoList &rhs); + + const lldb::SBProcessInfoList &operator=(const lldb::SBProcessInfoList &rhs); + + uint32_t GetSize() const; + + bool GetProcessInfoAtIndex(uint32_t idx, SBProcessInfo &info); + + void Clear(); + +private: + friend SBPlatform; + + SBProcessInfoList(const lldb_private::ProcessInfoList &impl); + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBPROCESSINSTANCEINFOLIST_H diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 08a58c80ef84779..129e4565d9ff993 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -407,6 +407,8 @@ class Platform : public PluginInterface { virtual uint32_t FindProcesses(const ProcessInstanceInfoMatch &match_info, ProcessInstanceInfoList &proc_infos); + ProcessInstanceInfoList GetAllProcesses(); + virtual bool GetProcessInfo(lldb::pid_t pid, ProcessInstanceInfo &proc_info); // Set a breakpoint on all functions that can end up creating a thread for @@ -883,7 +885,7 @@ class Platform : public PluginInterface { } virtual CompilerType GetSiginfoType(const llvm::Triple &triple); - + virtual Args GetExtraStartupCommands(); typedef std::function ProcessInstanceInfoList; +class ProcessInfoList { +public: + ProcessInfoList(const ProcessInstanceInfoList &list) : m_list(list) {} + + uint32_t GetSize() const { return m_list.size(); } + + bool GetProcessInfoAtIndex(uint32_t idx, ProcessInstanceInfo &info) { + if (idx < m_list.size()) { + info = m_list[idx]; + return true; + } + return false; + } + + void Clear() { return m_list.clear(); } + +private: + ProcessInstanceInfoList m_list; +}; + // ProcessInstanceInfoMatch // // A class to help matching one ProcessInstanceInfo to another. diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index a574a461d4920ae..895c6221a8073cf 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -61,6 +61,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBPlatform.cpp SBProcess.cpp SBProcessInfo.cpp + SBProcessInfoList.cpp SBQueue.cpp SBQueueItem.cpp SBReproducer.cpp diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index c31848fe04ea72c..3623fd35bcdf13f 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -14,6 +14,7 @@ #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBModuleSpec.h" #include "lldb/API/SBPlatform.h" +#include "lldb/API/SBProcessInfoList.h" #include "lldb/API/SBTarget.h" #include "lldb/API/SBUnixSignals.h" #include "lldb/Host/File.h" @@ -599,6 +600,20 @@ SBProcess SBPlatform::Attach(SBAttachInfo &attach_info, return {}; } +SBProcessInfoList SBPlatform::GetAllProcesses(SBError &error) { + if (PlatformSP platform_sp = GetSP()) { + if (platform_sp->IsConnected()) { + ProcessInstanceInfoList list = platform_sp->GetAllProcesses(); + return SBProcessInfoList(list); + } + error.SetErrorString("not connected"); + return {}; + } + + error.SetErrorString("invalid platform"); + return {}; +} + SBError SBPlatform::Kill(const lldb::pid_t pid) { LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { diff --git a/lldb/source/API/SBProcessInfoList.cpp b/lldb/source/API/SBProcessInfoList.cpp new file mode 100644 index 000000000000000..a711bcb58301e61 --- /dev/null +++ b/lldb/source/API/SBProcessInfoList.cpp @@ -0,0 +1,74 @@ +//===-- SBProcessInfoList.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProcessInfoList.h" +#include "lldb/API/SBProcessInfo.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/ProcessInfo.h" + +#include "Utils.h" + +using namespace lldb; +using namespace lldb_private; + +SBProcessInfoList::SBProcessInfoList() = default; + +SBProcessInfoList::~SBProcessInfoList() = default; + +SBProcessInfoList::SBProcessInfoList(const ProcessInfoList &impl) + : m_opaque_up(std::make_unique(impl)) { + LLDB_INSTRUMENT_VA(this, impl); +} + +SBProcessInfoList::SBProcessInfoList(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +const lldb::SBProcessInfoList & +SBProcessInfoList::operator=(const lldb::SBProcessInfoList &rhs) { + + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +uint32_t SBProcessInfoList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + return m_opaque_up->GetSize(); + + return 0; +} + +void SBProcessInfoList::Clear() { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_up) + m_opaque_up->Clear(); +} + +bool SBProcessInfoList::GetProcessInfoAtIndex(uint32_t idx, + SBProcessInfo &info) { + LLDB_INSTRUMENT_VA(this, idx, info); + + if (m_opaque_up) { + lldb_private::ProcessInstanceInfo process_instance_info; + if (m_opaque_up->GetProcessInfoAtIndex(idx, process_instance_info)) { + info.SetProcessInfo(process_instance_info); + return true; + } + } + + return false; +} diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c117339f07cc9df..c345e33136070f2 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -989,6 +989,14 @@ uint32_t Platform::FindProcesses(const ProcessInstanceInfoMatch &match_info, return match_count; } +ProcessInstanceInfoList Platform::GetAllProcesses() { + ProcessInstanceInfoList processes; + ProcessInstanceInfoMatch match; + assert(match.MatchAllProcesses()); + FindProcesses(match, processes); + return processes; +} + Status Platform::LaunchProcess(ProcessLaunchInfo &launch_info) { Status error; Log *log = GetLog(LLDBLog::Platform); diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py new file mode 100644 index 000000000000000..be0e3f5f8c50112 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPlatformListProcesses.py @@ -0,0 +1,54 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class TestPlatformListProcesses(GDBRemoteTestBase): + @skipIfRemote + @skipIfWindows + def test_get_all_processes(self): + """Test listing processes""" + + class MyPlatformResponder(MockGDBServerResponder): + def __init__(self): + MockGDBServerResponder.__init__(self) + self.done = False + + def qfProcessInfo(self, packet): + return "pid:95117;name:666f6f;" + + def qsProcessInfo(self): + if not self.done: + self.done = True + return "pid:95126;name:666f6f;" + return "E10" + + self.server.responder = MyPlatformResponder() + + error = lldb.SBError() + platform = lldb.SBPlatform("remote-linux") + self.dbg.SetSelectedPlatform(platform) + + error = platform.ConnectRemote( + lldb.SBPlatformConnectOptions(self.server.get_connect_url()) + ) + self.assertSuccess(error) + self.assertTrue(platform.IsConnected()) + + processes = platform.GetAllProcesses(error) + self.assertSuccess(error) + self.assertEqual(processes.GetSize(), 2) + self.assertEqual(len(processes), 2) + + process_info = lldb.SBProcessInfo() + processes.GetProcessInfoAtIndex(0, process_info) + self.assertEqual(process_info.GetProcessID(), 95117) + self.assertEqual(process_info.GetName(), "foo") + + processes.GetProcessInfoAtIndex(1, process_info) + self.assertEqual(process_info.GetProcessID(), 95126) + self.assertEqual(process_info.GetName(), "foo") + + platform.DisconnectRemote() From lldb-commits at lists.llvm.org Fri Oct 6 10:43:37 2023 From: lldb-commits at lists.llvm.org (Jonas Devlieghere via lldb-commits) Date: Fri, 06 Oct 2023 10:43:37 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Expose SBPlatform::GetAllProcesses to the SB API (PR #68378) In-Reply-To: Message-ID: <65204749.a70a0220.4e52e.553c@mx.google.com> https://github.com/JDevlieghere closed https://github.com/llvm/llvm-project/pull/68378 From lldb-commits at lists.llvm.org Fri Oct 6 10:49:17 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 10:49:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <6520489d.170a0220.987b0.d626@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/13] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/13] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/13] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/13] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/13] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/13] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/13] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/13] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/13] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/13] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] >From e562d1ca2297ec907c719b089ce77ea7f91a28a3 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 09:48:36 -0700 Subject: [PATCH 11/13] changed header protos --- mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 861b7eff65115b6..f25df11d15fdad1 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -134,8 +134,8 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp); /// SparseTensorReader method to obtain direct access to the @@ -149,7 +149,8 @@ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_CRUNNERUTILS_EXPORT bool \ _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ - StridedMemRefType *iref, StridedMemRefType *vref) \ + StridedMemRefType *lvl2dimRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) \ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) #undef DECL_GETNEXT >From 0dda2e88c0760a29a099897b1a0751513f510959 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:24:59 -0700 Subject: [PATCH 12/13] simpler MapRef --- .../ExecutionEngine/SparseTensor/MapRef.h | 53 ++++++------------- .../ExecutionEngine/SparseTensor/MapRef.cpp | 28 ++-------- 2 files changed, 21 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index a1bd6798f150b43..e63412498a1abb8 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -16,6 +16,7 @@ #include #include +#include namespace mlir { namespace sparse_tensor { @@ -23,12 +24,11 @@ namespace sparse_tensor { /// A class for capturing the sparse tensor type map with a compact encoding. /// /// Currently, the following situations are supported: -/// (1) map is an identity -/// (2) map is a permutation -/// (3) map has affine ops (restricted set) +/// (1) map is a permutation +/// (2) map has affine ops (restricted set) /// -/// The pushforward/backward operations are fast for (1) and (2) but -/// incur some obvious overhead for situation (3). +/// The pushforward/backward operations are fast for (1) but incur some obvious +/// overhead for situation (2). /// class MapRef final { public: @@ -38,20 +38,12 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template - inline void pushforward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < dimRank; ++i) - out[i] = in[i]; // TODO: optimize with in == out ? - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < dimRank; ++i) - out[dim2lvl[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushforward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[lvl2dim[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -59,20 +51,12 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template - inline void pushbackward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < lvlRank; ++i) - out[i] = in[i]; - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < lvlRank; ++i) - out[lvl2dim[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushbackward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[dim2lvl[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -80,16 +64,13 @@ class MapRef final { uint64_t getLvlRank() const { return lvlRank; } private: - enum class MapKind { kIdentity, kPermutation, kAffine }; - - bool isIdentity() const; - bool isPermutation() const; + bool isPermutationMap() const; - MapKind kind; const uint64_t dimRank; const uint64_t lvlRank; const uint64_t *const dim2lvl; // non-owning pointer const uint64_t *const lvl2dim; // non-owning pointer + const bool isPermutation; }; } // namespace sparse_tensor diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp index ed458afeae746bc..ee4d6fa0d34b491 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -6,39 +6,19 @@ // //===----------------------------------------------------------------------===// -#include - #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d) - : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { - assert(d2l && l2d); - // Determine the kind of mapping (and asserts on simple inference). - if (isIdentity()) { - kind = MapKind::kIdentity; - for (uint64_t i = 0; i < dimRank; i++) - assert(lvl2dim[i] == i); - } else if (isPermutation()) { - kind = MapKind::kPermutation; + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d), + isPermutation(isPermutationMap()) { + if (isPermutation) { for (uint64_t i = 0; i < dimRank; i++) assert(lvl2dim[dim2lvl[i]] == i); - } else { - kind = MapKind::kAffine; - } -} - -bool mlir::sparse_tensor::MapRef::isIdentity() const { - if (dimRank != lvlRank) - return false; - for (uint64_t i = 0; i < dimRank; i++) { - if (dim2lvl[i] != i) - return false; } - return true; } -bool mlir::sparse_tensor::MapRef::isPermutation() const { +bool mlir::sparse_tensor::MapRef::isPermutationMap() const { if (dimRank != lvlRank) return false; std::vector seen(dimRank, false); >From ea50b3820f0a7817b7add513d6b13292a1768620 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:46:18 -0700 Subject: [PATCH 13/13] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index e63412498a1abb8..22ae70a61d95eff 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < lvlRank; ++i) out[i] = in[lvl2dim[i]]; @@ -51,7 +52,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < dimRank; ++i) out[i] = in[dim2lvl[i]]; From lldb-commits at lists.llvm.org Fri Oct 6 10:56:14 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 10:56:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] Add `target modules dump separate-debug-info` (PR #66035) In-Reply-To: Message-ID: <65204a3e.170a0220.980f8.d79e@mx.google.com> ================ @@ -1462,6 +1464,87 @@ static bool DumpModuleSymbolFile(Stream &strm, Module *module) { return false; } +static bool GetSeparateDebugInfoList(StructuredData::Array &list, + Module *module) { + if (module) { + if (SymbolFile *symbol_file = module->GetSymbolFile(true)) { ---------------- jeffreytan81 wrote: Merge into one line to reduce indention? ``` if (module && SymbolFile *symbol_file = module->GetSymbolFile(true)) { } ``` https://github.com/llvm/llvm-project/pull/66035 From lldb-commits at lists.llvm.org Fri Oct 6 10:57:32 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Fri, 06 Oct 2023 10:57:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65204a8c.630a0220.da284.9991@mx.google.com> ================ @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + ArrayRef dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are ---------------- yinying-lisa-li wrote: identity? https://github.com/llvm/llvm-project/pull/68360 From lldb-commits at lists.llvm.org Fri Oct 6 11:01:59 2023 From: lldb-commits at lists.llvm.org (Yinying Li via lldb-commits) Date: Fri, 06 Oct 2023 11:01:59 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65204b97.170a0220.444af.e480@mx.google.com> https://github.com/yinying-lisa-li approved this pull request. https://github.com/llvm/llvm-project/pull/68360 From lldb-commits at lists.llvm.org Fri Oct 6 11:15:19 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 11:15:19 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65204eb7.630a0220.9be1f.b32d@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/14] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/14] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/14] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/14] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/14] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/14] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/14] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/14] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/14] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/14] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] >From e562d1ca2297ec907c719b089ce77ea7f91a28a3 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 09:48:36 -0700 Subject: [PATCH 11/14] changed header protos --- mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 861b7eff65115b6..f25df11d15fdad1 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -134,8 +134,8 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp); /// SparseTensorReader method to obtain direct access to the @@ -149,7 +149,8 @@ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_CRUNNERUTILS_EXPORT bool \ _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ - StridedMemRefType *iref, StridedMemRefType *vref) \ + StridedMemRefType *lvl2dimRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) \ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) #undef DECL_GETNEXT >From 0dda2e88c0760a29a099897b1a0751513f510959 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:24:59 -0700 Subject: [PATCH 12/14] simpler MapRef --- .../ExecutionEngine/SparseTensor/MapRef.h | 53 ++++++------------- .../ExecutionEngine/SparseTensor/MapRef.cpp | 28 ++-------- 2 files changed, 21 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index a1bd6798f150b43..e63412498a1abb8 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -16,6 +16,7 @@ #include #include +#include namespace mlir { namespace sparse_tensor { @@ -23,12 +24,11 @@ namespace sparse_tensor { /// A class for capturing the sparse tensor type map with a compact encoding. /// /// Currently, the following situations are supported: -/// (1) map is an identity -/// (2) map is a permutation -/// (3) map has affine ops (restricted set) +/// (1) map is a permutation +/// (2) map has affine ops (restricted set) /// -/// The pushforward/backward operations are fast for (1) and (2) but -/// incur some obvious overhead for situation (3). +/// The pushforward/backward operations are fast for (1) but incur some obvious +/// overhead for situation (2). /// class MapRef final { public: @@ -38,20 +38,12 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template - inline void pushforward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < dimRank; ++i) - out[i] = in[i]; // TODO: optimize with in == out ? - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < dimRank; ++i) - out[dim2lvl[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushforward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[lvl2dim[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -59,20 +51,12 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template - inline void pushbackward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < lvlRank; ++i) - out[i] = in[i]; - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < lvlRank; ++i) - out[lvl2dim[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushbackward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[dim2lvl[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -80,16 +64,13 @@ class MapRef final { uint64_t getLvlRank() const { return lvlRank; } private: - enum class MapKind { kIdentity, kPermutation, kAffine }; - - bool isIdentity() const; - bool isPermutation() const; + bool isPermutationMap() const; - MapKind kind; const uint64_t dimRank; const uint64_t lvlRank; const uint64_t *const dim2lvl; // non-owning pointer const uint64_t *const lvl2dim; // non-owning pointer + const bool isPermutation; }; } // namespace sparse_tensor diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp index ed458afeae746bc..ee4d6fa0d34b491 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -6,39 +6,19 @@ // //===----------------------------------------------------------------------===// -#include - #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d) - : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { - assert(d2l && l2d); - // Determine the kind of mapping (and asserts on simple inference). - if (isIdentity()) { - kind = MapKind::kIdentity; - for (uint64_t i = 0; i < dimRank; i++) - assert(lvl2dim[i] == i); - } else if (isPermutation()) { - kind = MapKind::kPermutation; + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d), + isPermutation(isPermutationMap()) { + if (isPermutation) { for (uint64_t i = 0; i < dimRank; i++) assert(lvl2dim[dim2lvl[i]] == i); - } else { - kind = MapKind::kAffine; - } -} - -bool mlir::sparse_tensor::MapRef::isIdentity() const { - if (dimRank != lvlRank) - return false; - for (uint64_t i = 0; i < dimRank; i++) { - if (dim2lvl[i] != i) - return false; } - return true; } -bool mlir::sparse_tensor::MapRef::isPermutation() const { +bool mlir::sparse_tensor::MapRef::isPermutationMap() const { if (dimRank != lvlRank) return false; std::vector seen(dimRank, false); >From ea50b3820f0a7817b7add513d6b13292a1768620 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:46:18 -0700 Subject: [PATCH 13/14] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index e63412498a1abb8..22ae70a61d95eff 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < lvlRank; ++i) out[i] = in[lvl2dim[i]]; @@ -51,7 +52,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < dimRank; ++i) out[i] = in[dim2lvl[i]]; >From 57a73fe7d1a56cefa1f2d8d1bc43517f8d380247 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 11:14:27 -0700 Subject: [PATCH 14/14] typo --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 61fecdad3be9398..c44c5985ee7bba1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -782,7 +782,7 @@ Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, /*out*/ Value &lvl2dimBuffer) { const Dimension dimRank = stt.getDimRank(); const Level lvlRank = stt.getLvlRank(); - // For an identify mapping, the dim2lvl and lvl2dim mappings are + // For an identity mapping, the dim2lvl and lvl2dim mappings are // identical as are dimSizes and lvlSizes, so buffers are reused // as much as possible. if (stt.isIdentity()) { From lldb-commits at lists.llvm.org Fri Oct 6 11:19:54 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 11:19:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] Add `target modules dump separate-debug-info` (PR #66035) In-Reply-To: Message-ID: <65204fca.170a0220.4898a.e06b@mx.google.com> jeffreytan81 wrote: I am not a big fan of having a specialized name `separate-debug-info` to dump external debug info. Ideally, I would like a single/centralized command for end users to see check symbols/debug info status for a target including all possible situations. How about we rename this as a more generic command name (like `target module dump syminfo` or `target symbols dump`) which can be extended to include more than dwo/N_OSO scenarios: - Status of dwo structures (like this) - External dSYM files and .debuginfo files status (missing or not) - Embedded symbol status: like symbol table only, debug info + symbol table, debug info only + symbol table stripped. - dwp files The benefit is that this provides a central command to dump all symbol file status instead of several commands to gather info. I do not think the above should be included in this patch but at least the command name opens the possibility for extension in future. Another option is treating `image list` to be the central command mentioned above. And adding new options for `image list` to include dwo/N_OSO, external/embedded symbol/debuginfo status etc... I believe most Windows debuggers (like windbg) use this approach. What do you guys think? https://github.com/llvm/llvm-project/pull/66035 From lldb-commits at lists.llvm.org Fri Oct 6 12:02:55 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 12:02:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] Add `target modules dump separate-debug-info` (PR #66035) In-Reply-To: Message-ID: <652059df.170a0220.a8039.c633@mx.google.com> ================ @@ -1462,6 +1464,87 @@ static bool DumpModuleSymbolFile(Stream &strm, Module *module) { return false; } +static bool GetSeparateDebugInfoList(StructuredData::Array &list, + Module *module) { + if (module) { + if (SymbolFile *symbol_file = module->GetSymbolFile(true)) { ---------------- jeffreytan81 wrote: What is `true` meaning here? Add an inline comment. https://github.com/llvm/llvm-project/pull/66035 From lldb-commits at lists.llvm.org Fri Oct 6 13:40:04 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 13:40:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <652070a4.170a0220.4deb1.ec8b@mx.google.com> https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68360 >From 6094912685a0cfa5c13e023e8ec97238a84fca2f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 01/15] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From d54b03e367ed34ebea5a0b06c6c6f2e4a04b93b7 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 02/15] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 5ecff8cfae4fb7790d41ac3e07a6b2dbb3a47403 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 03/15] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 60cbc0a3c3cd3ee66b331183d42d33b9034e617c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 04/15] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From c8155c21509a09e70e167b2f8182e3a7d6709025 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 13:22:28 -0700 Subject: [PATCH 05/15] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs). --- .../mlir/ExecutionEngine/SparseTensor/File.h | 156 ++++++---------- .../ExecutionEngine/SparseTensor/MapRef.h | 96 ++++++++++ .../ExecutionEngine/SparseTensor/Storage.h | 108 +---------- .../ExecutionEngine/SparseTensorRuntime.h | 8 - .../SparseTensor/Transforms/CodegenUtils.cpp | 89 +++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 18 ++ .../Transforms/SparseTensorCodegen.cpp | 73 ++------ .../Transforms/SparseTensorConversion.cpp | 111 ++--------- .../SparseTensor/CMakeLists.txt | 1 + .../ExecutionEngine/SparseTensor/MapRef.cpp | 52 ++++++ .../ExecutionEngine/SparseTensorRuntime.cpp | 60 +++--- mlir/test/Dialect/SparseTensor/codegen.mlir | 172 +++++++++--------- .../test/Dialect/SparseTensor/conversion.mlir | 18 +- 13 files changed, 475 insertions(+), 487 deletions(-) create mode 100644 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h create mode 100644 mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h index 78c1a0544e3a521..9157bfa7e773239 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -20,6 +20,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) { } // namespace detail +//===----------------------------------------------------------------------===// +// +// Reader class. +// //===----------------------------------------------------------------------===// /// This class abstracts over the information stored in file headers, @@ -132,6 +137,7 @@ class SparseTensorReader final { /// Reads and parses the file's header. void readHeader(); + /// Returns the stored value kind. ValueKind getValueKind() const { return valueKind_; } /// Checks if a header has been successfully read. @@ -185,58 +191,37 @@ class SparseTensorReader final { /// valid after parsing the header. void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; - /// Reads a sparse tensor element from the next line in the input file and - /// returns the value of the element. Stores the coordinates of the element - /// to the `dimCoords` array. - template - V readElement(uint64_t dimRank, uint64_t *dimCoords) { - assert(dimRank == getRank() && "rank mismatch"); - char *linePtr = readCoords(dimCoords); - return detail::readValue(&linePtr, isPattern()); - } - - /// Allocates a new COO object for `lvlSizes`, initializes it by reading - /// all the elements from the file and applying `dim2lvl` to their - /// dim-coordinates, and then closes the file. Templated on V only. - template - SparseTensorCOO *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes, - const uint64_t *dim2lvl); - /// Allocates a new sparse-tensor storage object with the given encoding, /// initializes it by reading all the elements from the file, and then /// closes the file. Templated on P, I, and V. template SparseTensorStorage * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, - const DimLevelType *lvlTypes, const uint64_t *lvl2dim, - const uint64_t *dim2lvl) { - auto *lvlCOO = readCOO(lvlRank, lvlSizes, dim2lvl); + const DimLevelType *lvlTypes, const uint64_t *dim2lvl, + const uint64_t *lvl2dim) { + const uint64_t dimRank = getRank(); + MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim); + auto *coo = readCOO(map, lvlSizes); auto *tensor = SparseTensorStorage::newFromCOO( - getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO); - delete lvlCOO; + dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo); + delete coo; return tensor; } /// Reads the COO tensor from the file, stores the coordinates and values to /// the given buffers, returns a boolean value to indicate whether the COO /// elements are sorted. - /// Precondition: the buffers should have enough space to hold the elements. template bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, - C *lvlCoordinates, V *values); + const uint64_t *lvl2dim, C *lvlCoordinates, V *values); private: - /// Attempts to read a line from the file. Is private because there's - /// no reason for client code to call it. + /// Attempts to read a line from the file. void readLine(); /// Reads the next line of the input file and parses the coordinates /// into the `dimCoords` argument. Returns the position in the `line` - /// buffer where the element's value should be parsed from. This method - /// has been factored out from `readElement` to minimize code bloat - /// for the generated library. - /// - /// Precondition: `dimCoords` is valid for `getRank()`. + /// buffer where the element's value should be parsed from. template char *readCoords(C *dimCoords) { readLine(); @@ -251,24 +236,20 @@ class SparseTensorReader final { return linePtr; } - /// The internal implementation of `readCOO`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. - // - // TODO: We currently take the `dim2lvl` argument as a `PermutationRef` - // since that's what `readCOO` creates. Once we update `readCOO` to - // functionalize the mapping, then this helper will just take that - // same function. + /// Reads all the elements from the file while applying the given map. + template + SparseTensorCOO *readCOO(const MapRef &map, const uint64_t *lvlSizes); + + /// The implementation of `readCOO` that is templated `IsPattern` in order + /// to perform LICM without needing to duplicate the source code. template - void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO); + void readCOOLoop(const MapRef &map, SparseTensorCOO *coo); - /// The internal implementation of `readToBuffers`. We template over - /// `IsPattern` in order to perform LICM without needing to duplicate the - /// source code. + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate + /// the source code. template - bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values); + bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values); /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -288,96 +269,76 @@ class SparseTensorReader final { char line[kColWidth]; }; +//===----------------------------------------------------------------------===// +// +// Reader class methods. +// //===----------------------------------------------------------------------===// template -SparseTensorCOO *SparseTensorReader::readCOO(uint64_t lvlRank, - const uint64_t *lvlSizes, - const uint64_t *dim2lvl) { +SparseTensorCOO *SparseTensorReader::readCOO(const MapRef &map, + const uint64_t *lvlSizes) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); // Prepare a COO object with the number of stored elems as initial capacity. - auto *lvlCOO = new SparseTensorCOO(lvlRank, lvlSizes, getNSE()); - // Do some manual LICM, to avoid assertions in the for-loop. - const bool IsPattern = isPattern(); - if (IsPattern) - readCOOLoop(lvlRank, d2l, lvlCOO); + auto *coo = new SparseTensorCOO(map.getLvlRank(), lvlSizes, getNSE()); + // Enter the reading loop. + if (isPattern()) + readCOOLoop(map, coo); else - readCOOLoop(lvlRank, d2l, lvlCOO); + readCOOLoop(map, coo); // Close the file and return the COO. closeFile(); - return lvlCOO; + return coo; } template -void SparseTensorReader::readCOOLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - SparseTensorCOO *lvlCOO) { - const uint64_t dimRank = getRank(); +void SparseTensorReader::readCOOLoop(const MapRef &map, + SparseTensorCOO *coo) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); std::vector lvlCoords(lvlRank); - for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) { - // We inline `readElement` here in order to avoid redundant - // assertions, since they're guaranteed by the call to `isValid()` - // and the construction of `dimCoords` above. + for (uint64_t k = 0, nse = getNSE(); k < nse; k++) { char *linePtr = readCoords(dimCoords.data()); const V value = detail::readValue(&linePtr); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data()); - // TODO: - lvlCOO->add(lvlCoords, value); + map.pushforward(dimCoords.data(), lvlCoords.data()); + coo->add(lvlCoords, value); } } template bool SparseTensorReader::readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + const uint64_t *lvl2dim, C *lvlCoordinates, V *values) { assert(isValid() && "Attempt to readCOO() before readHeader()"); - // Construct a `PermutationRef` for the `pushforward` below. - // TODO: This specific implementation does not generalize to arbitrary - // mappings, but once we functionalize the `dim2lvl` argument we can - // simply use that function instead. - const uint64_t dimRank = getRank(); - assert(lvlRank == dimRank && "Rank mismatch"); - detail::PermutationRef d2l(dimRank, dim2lvl); - // Do some manual LICM, to avoid assertions in the for-loop. + MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim); bool isSorted = - isPattern() - ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) - : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, - values); - - // Close the file and return isSorted. + isPattern() ? readToBuffersLoop(map, lvlCoordinates, values) + : readToBuffersLoop(map, lvlCoordinates, values); closeFile(); return isSorted; } template -bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, - detail::PermutationRef dim2lvl, - C *lvlCoordinates, V *values) { - const uint64_t dimRank = getRank(); +bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates, + V *values) { + const uint64_t dimRank = map.getDimRank(); + const uint64_t lvlRank = map.getLvlRank(); const uint64_t nse = getNSE(); + assert(dimRank == getRank()); std::vector dimCoords(dimRank); - // Read the first element with isSorted=false as a way to avoid accessing its - // previous element. bool isSorted = false; char *linePtr; - // We inline `readElement` here in order to avoid redundant assertions, - // since they're guaranteed by the call to `isValid()` and the construction - // of `dimCoords` above. const auto readNextElement = [&]() { linePtr = readCoords(dimCoords.data()); - dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + map.pushforward(dimCoords.data(), lvlCoordinates); *values = detail::readValue(&linePtr); if (isSorted) { - // Note that isSorted was set to false while reading the first element, + // Note that isSorted is set to false when reading the first element, // to guarantee the safeness of using prevLvlCoords. C *prevLvlCoords = lvlCoordinates - lvlRank; - // TODO: define a new CoordsLT which is like ElementLT but doesn't have - // the V parameter, and use it here. for (uint64_t l = 0; l < lvlRank; ++l) { if (prevLvlCoords[l] != lvlCoordinates[l]) { if (prevLvlCoords[l] > lvlCoordinates[l]) @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, isSorted = true; for (uint64_t n = 1; n < nse; ++n) readNextElement(); - return isSorted; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h new file mode 100644 index 000000000000000..1c155568802e579 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -0,0 +1,96 @@ +//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A dim2lvl/lvl2dim map encoding class, with utility methods. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H + +#include + +#include + +namespace mlir { +namespace sparse_tensor { + +/// A class for capturing the sparse tensor type map with a compact encoding. +/// +/// Currently, the following situations are supported: +/// (1) map is an identity +/// (2) map is a permutation +/// (3) map has affine ops (restricted set) +/// +/// The pushforward/backward operations are fast for (1) and (2) but +/// incur some obvious overhead for situation (3). +/// +class MapRef final { +public: + MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d); + + // + // Push forward maps from dimensions to levels. + // + + template inline void pushforward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[i]; // TODO: optimize with in == out ? + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < dimRank; ++i) + out[dim2lvl[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + // + // Push backward maps from levels to dimensions. + // + + template inline void pushbackward(const T *in, T *out) const { + switch (kind) { + case MapKind::kIdentity: + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[i]; + break; + case MapKind::kPermutation: + for (uint64_t i = 0; i < lvlRank; ++i) + out[lvl2dim[i]] = in[i]; + break; + case MapKind::kAffine: + assert(0 && "coming soon"); + break; + } + } + + uint64_t getDimRank() const { return dimRank; } + uint64_t getLvlRank() const { return lvlRank; } + +private: + enum class MapKind { kIdentity, kPermutation, kAffine }; + + bool isIdentity() const; + bool isPermutation() const; + + MapKind kind; + const uint64_t dimRank; + const uint64_t lvlRank; + const uint64_t *const dim2lvl; // non-owning pointer + const uint64_t *const lvl2dim; // non-owning pointer +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 28c28c28109c3c7..37ad3c1b042313c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase; template class SparseTensorEnumerator; -namespace detail { - -/// Checks whether the `perm` array is a permutation of `[0 .. size)`. -inline bool isPermutation(uint64_t size, const uint64_t *perm) { - assert(perm && "Got nullptr for permutation"); - std::vector seen(size, false); - for (uint64_t i = 0; i < size; ++i) { - const uint64_t j = perm[i]; - if (j >= size || seen[j]) - return false; - seen[j] = true; - } - for (uint64_t i = 0; i < size; ++i) - if (!seen[i]) - return false; - return true; -} - -/// Wrapper around `isPermutation` to ensure consistent error messages. -inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { -#ifndef NDEBUG - if (!isPermutation(size, perm)) - MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); -#endif -} - -/// A class for capturing the knowledge that `isPermutation` is true. -class PermutationRef final { -public: - /// Asserts `isPermutation` and returns the witness to that being true. - explicit PermutationRef(uint64_t size, const uint64_t *perm) - : permSize(size), perm(perm) { - assertIsPermutation(size, perm); - } - - uint64_t size() const { return permSize; } - - const uint64_t *data() const { return perm; } - - const uint64_t &operator[](uint64_t i) const { - assert(i < permSize && "index is out of bounds"); - return perm[i]; - } - - /// Constructs a pushforward array of values. This method is the inverse - /// of `permute` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector pushforward(const std::vector &values) const { - return pushforward(values.size(), values.data()); - } - - template - inline std::vector pushforward(uint64_t size, const T *values) const { - std::vector out(permSize); - pushforward(size, values, out.data()); - return out; - } - - template - inline void pushforward(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[perm[i]] = values[i]; - } - - /// Constructs a permuted array of values. This method is the inverse - /// of `pushforward` in the sense that for all `p` and `xs` we have: - /// * `p.permute(p.pushforward(xs)) == xs` - /// * `p.pushforward(p.permute(xs)) == xs` - template - inline std::vector permute(const std::vector &values) const { - return permute(values.size(), values.data()); - } - - template - inline std::vector permute(uint64_t size, const T *values) const { - std::vector out(permSize); - permute(size, values, out.data()); - return out; - } - - template - inline void permute(uint64_t size, const T *values, T *out) const { - assert(size == permSize && "size mismatch"); - for (uint64_t i = 0; i < permSize; ++i) - out[i] = values[perm[i]]; - } - -private: - const uint64_t permSize; - const uint64_t *const perm; // non-owning pointer. -}; - -} // namespace detail - /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -263,7 +166,7 @@ class SparseTensorStorageBase { bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } /// Allocates a new enumerator. Callers must make sure to delete - /// the enumerator when they're done with it. The first argument + /// the enumerator when they're done with it. The first argument /// is the out-parameter for storing the newly allocated enumerator; /// all other arguments are passed along to the `SparseTensorEnumerator` /// ctor and must satisfy the preconditions/assertions thereof. @@ -326,6 +229,7 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; + /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *lvlTypes, const uint64_t *lvl2dim, const intptr_t *lvlBufs); - /// Allocates a new empty sparse tensor. The preconditions/assertions + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", /// since there's nowhere to reconstruct dynamic sizes from. @@ -577,6 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, uint64_t srcRank, const uint64_t *src2trg) const { // We inline `newEnumerator` to avoid virtual dispatch and allocation. + // TODO: use MapRef here too for the translation SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, srcRank, src2trg); auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); @@ -733,7 +638,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } } - /// Continues a single insertion path, outer to inner. The first + /// Continues a single insertion path, outer to inner. The first /// argument is the level-coordinates for the value being inserted. void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, V val) { @@ -875,7 +780,8 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 8f320f04f23fc84..861b7eff65115b6 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -143,14 +143,6 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( StridedMemRefType *out, void *p); -/// Returns the next element for the sparse tensor being read. -#define DECL_GETNEXT(VNAME, V) \ - MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref); -MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) -#undef DECL_GETNEXT - /// Reads the sparse tensor, stores the coordinates and values to the given /// memrefs. Returns a boolean to indicate whether the COO elements are sorted. #define DECL_GETNEXT(VNAME, V, CNAME, C) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ce77d7a519877d6..ffb1a550957edb8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -729,3 +729,92 @@ Value sparse_tensor::createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, return constantIndex(builder, loc, *stride); return builder.create(loc, tensor, APInt(64, dim)); } + +void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &out) { + out.clear(); + out.reserve(stt.getDimRank()); + for (const DynSize sh : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + out.push_back(constantIndex(builder, loc, s)); + } +} + +Value sparse_tensor::genReader(OpBuilder &builder, Location loc, + SparseTensorType stt, Value tensor, + /*out*/ SmallVectorImpl &dimShapesValues, + /*out*/ Value &dimSizesBuffer) { + // Construct the dimShapes buffer. The buffer contains the static size + // per dimension, or otherwise a zero for a dynamic size. + fillDimShape(builder, loc, stt, dimShapesValues); + Value dimShapesBuffer = allocaBuffer(builder, loc, dimShapesValues); + // Create the `CheckedSparseTensorReader`. This reader performs a + // consistency check on the static sizes, but accepts any size + // of each dimension with a dynamic size. + Type opaqueTp = getOpaquePointerType(builder); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(builder, loc, eltTp); + Value reader = + createFuncCall(builder, loc, "createCheckedSparseTensorReader", opaqueTp, + {tensor, dimShapesBuffer, valTp}, EmitCInterface::On) + .getResult(0); + // For static shapes, the shape buffer can be used right away. For dynamic + // shapes, use the information from the reader to construct a buffer that + // supplies the actual size for each dynamic dimension. + dimSizesBuffer = dimShapesBuffer; + if (stt.hasDynamicDimShape()) { + Type indexTp = builder.getIndexType(); + auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); + dimSizesBuffer = + createFuncCall(builder, loc, "getSparseTensorReaderDimSizes", memTp, + reader, EmitCInterface::On) + .getResult(0); + } + return reader; +} + +Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, + SparseTensorType stt, + SmallVectorImpl &dimShapesValues, + Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer) { + const Dimension dimRank = stt.getDimRank(); + const Level lvlRank = stt.getLvlRank(); + // For an identify mapping, the dim2lvl and lvl2dim mappings are + // identical as are dimSizes and lvlSizes, so buffers are reused + // as much as possible. + if (stt.isIdentity()) { + assert(dimRank == lvlRank); + SmallVector iotaValues; + iotaValues.reserve(lvlRank); + for (Level l = 0; l < lvlRank; l++) + iotaValues.push_back(constantIndex(builder, loc, l)); + dim2lvlBuffer = lvl2dimBuffer = allocaBuffer(builder, loc, iotaValues); + return dimSizesBuffer; + } + // Otherwise, some code needs to be generated to set up the buffers. + // TODO: use the lvl2dim once available and deal with non-permutations! + const auto dimToLvl = stt.getDimToLvl(); + assert(dimToLvl.isPermutation()); + SmallVector dim2lvlValues(dimRank); + SmallVector lvl2dimValues(lvlRank); + SmallVector lvlSizesValues(lvlRank); + for (Level l = 0; l < lvlRank; l++) { + // The `d`th source variable occurs in the `l`th result position. + Dimension d = dimToLvl.getDimPosition(l); + Value lvl = constantIndex(builder, loc, l); + Value dim = constantIndex(builder, loc, d); + dim2lvlValues[d] = lvl; + lvl2dimValues[l] = dim; + if (stt.isDynamicDim(d)) + lvlSizesValues[l] = + builder.create(loc, dimSizesBuffer, dim); + else + lvlSizesValues[l] = dimShapesValues[d]; + } + dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); + lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); + return allocaBuffer(builder, loc, lvlSizesValues); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 8145446751b9938..08ea019d8224a73 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Builders.h" @@ -341,6 +342,23 @@ Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim); +/// Populates the array with the dimension-shape of the given +/// `SparseTensorType`, where dynamic sizes are represented by zero. +void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &out); + +/// Generates code that opens a reader and sets the dimension sizes. +Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, + Value tensor, + /*out*/ SmallVectorImpl &dimShapeValues, + /*out*/ Value &dimSizesBuffer); + +/// Generates code to set up the buffer parameters for a reader. +Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, + SmallVectorImpl &dimShapeValues, + Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + /*out*/ Value &lvl2dimBuffer); + //===----------------------------------------------------------------------===// // Inlined constant generators. // diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 7c362c086623b42..2c03f0a6020e6a8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1428,7 +1428,7 @@ struct SparseDisassembleOpConverter } }; -struct SparseNewOpConverter : public OpConversionPattern { +struct SparseNewConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, @@ -1440,7 +1440,7 @@ struct SparseNewOpConverter : public OpConversionPattern { if (!dstTp.hasEncoding() || getCOOStart(dstTp.getEncoding()) != 0) return failure(); - // Implement the NewOp(filename) as follows: + // Implement as follows: // %reader = @createCheckedSparseTensorReader(%filename) // %nse = @getSparseTensorNSE(%reader) // %coo = bufferization.alloc_tensor an ordered COO with @@ -1451,74 +1451,39 @@ struct SparseNewOpConverter : public OpConversionPattern { // if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values) // update storage specifier // @delSparseTensorReader(%reader) + SmallVector dimShapesValues; + Value dimSizesBuffer; + Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - const Type opaqueTp = getOpaquePointerType(rewriter); - const Value fileName = op.getSource(); - SmallVector dimShapeValues; - for (const DynSize sh : dstTp.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - dimShapeValues.push_back(constantIndex(rewriter, loc, s)); - } - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, dstTp.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, {fileName, dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); + // Get the number of stored entries. const Type indexTp = rewriter.getIndexType(); - const Dimension dimRank = dstTp.getDimRank(); - const Level lvlRank = dstTp.getLvlRank(); + Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", + {indexTp}, {reader}, EmitCInterface::Off) + .getResult(0); - // If the result tensor has dynamic dimensions, get the dynamic sizes from - // the sparse tensor reader. + // Construct allocation for each field. SmallVector dynSizes; if (dstTp.hasDynamicDimShape()) { - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - Value dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); for (const auto &d : llvm::enumerate(dstTp.getDimShape())) if (ShapedType::isDynamic(d.value())) dynSizes.push_back(rewriter.create( loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index()))); } - - // Get the number of stored entries. - Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE", - {indexTp}, {reader}, EmitCInterface::Off) - .getResult(0); - // Construct allocation for each field. SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); MutSparseTensorDescriptor desc(dstTp, fields); - // Construct the `dimToLvl` buffer for handing off to the runtime library. - SmallVector dimToLvlValues(dimRank); - if (!dstTp.isIdentity()) { - const auto dimToLvl = dstTp.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - for (Level l = 0; l < lvlRank; l++) { - const Dimension d = dimToLvl.getDimPosition(l); - dimToLvlValues[d] = constantIndex(rewriter, loc, l); - } - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - for (Dimension d = 0; d < dimRank; d++) - dimToLvlValues[d] = constantIndex(rewriter, loc, d); - } - Value dimToLvl = allocaBuffer(rewriter, loc, dimToLvlValues); + // Now construct the dim2lvl and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + genReaderBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); - const Type boolTp = rewriter.getIntegerType(1); const Type elemTp = dstTp.getElementType(); const Type crdTp = dstTp.getCrdType(); @@ -1527,11 +1492,13 @@ struct SparseNewOpConverter : public OpConversionPattern { primaryTypeFunctionSuffix(elemTp)}; Value isSorted = createFuncCall(rewriter, loc, readToBuffersFuncName, {boolTp}, - {reader, dimToLvl, xs, ys}, EmitCInterface::On) + {reader, dim2lvlBuffer, lvl2dimBuffer, xs, ys}, + EmitCInterface::On) .getResult(0); // If the destination tensor is a sorted COO, we need to sort the COO tensor // data if the input elements aren't sorted yet. + const Level lvlRank = dstTp.getLvlRank(); if (dstTp.isOrderedLvl(lvlRank - 1)) { Value kFalse = constantI1(rewriter, loc, false); Value notSorted = rewriter.create( @@ -1593,7 +1560,7 @@ void mlir::populateSparseTensorCodegenPatterns( StorageSpecifierKind::DimStride>, SparseToPositionsConverter, SparseToCoordinatesConverter, SparseToCoordinatesBufferConverter, SparseToValuesConverter, - SparseConvertConverter, SparseNewOpConverter, + SparseConvertConverter, SparseNewConverter, SparseNumberOfEntriesConverter>(typeConverter, patterns.getContext()); patterns.add( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a3361c2cd48c6dd..eb0c5160e8d6193 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -46,8 +46,7 @@ static std::optional convertSparseTensorTypes(Type type) { return std::nullopt; } -/// Replaces the `op` with a `CallOp` to the function reference returned -/// by `getFunc()`. +/// Replaces the `op` with a `CallOp` to the `getFunc()` function reference. static func::CallOp replaceOpWithFuncCall(RewriterBase &rewriter, Operation *op, StringRef name, TypeRange resultType, ValueRange operands, @@ -141,27 +140,6 @@ static SmallVector getDimSizes(OpBuilder &builder, Location loc, return out; } -/// Populates the array with the dimension-shape of the given -/// `SparseTensorType`, where dynamic sizes are represented by zero. -static void fillDimShape(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &out) { - out.clear(); - out.reserve(stt.getDimRank()); - for (const DynSize sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; - out.push_back(constantIndex(builder, loc, s)); - } -} - -/// Returns an array with the dimension-shape of the given `SparseTensorType`, -/// where dynamic sizes are represented by zero. -static SmallVector getDimShape(OpBuilder &builder, Location loc, - SparseTensorType stt) { - SmallVector out; - fillDimShape(builder, loc, stt, out); - return out; -} - /// Generates an uninitialized buffer of the given size and type, /// but returns it as type `memref` (rather than as type /// `memref<$sz x $tp>`). Unlike temporary buffers on the stack, @@ -503,84 +481,27 @@ class SparseTensorNewConverter : public OpConversionPattern { const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); - const Dimension dimRank = stt.getDimRank(); - const Level lvlRank = stt.getLvlRank(); - // Construct the dimShape. - SmallVector dimShapeValues = getDimShape(rewriter, loc, stt); - Value dimShapeBuffer = allocaBuffer(rewriter, loc, dimShapeValues); - // Allocate `SparseTensorReader` and perform all initial setup that - // does not depend on lvlSizes (nor dimToLvl, lvlToDim, etc). - Type opaqueTp = getOpaquePointerType(rewriter); - Value valTp = - constantPrimaryTypeEncoding(rewriter, loc, stt.getElementType()); - Value reader = - createFuncCall(rewriter, loc, "createCheckedSparseTensorReader", - opaqueTp, - {adaptor.getOperands()[0], dimShapeBuffer, valTp}, - EmitCInterface::On) - .getResult(0); - // Construct the lvlSizes. If the dimShape is static, then it's - // identical to dimSizes: so we can compute lvlSizes entirely at - // compile-time. If dimShape is dynamic, then we'll need to generate - // code for computing lvlSizes from the `reader`'s actual dimSizes. - // - // TODO: For now we're still assuming `dimToLvl` is a permutation. - // But since we're computing lvlSizes here (rather than in the runtime), - // we can easily generalize that simply by adjusting this code. - // - // FIXME: reduce redundancy vs `NewCallParams::genBuffers`. + // Construct the reader opening method calls. + SmallVector dimShapesValues; Value dimSizesBuffer; - if (stt.hasDynamicDimShape()) { - Type indexTp = rewriter.getIndexType(); - auto memTp = MemRefType::get({ShapedType::kDynamic}, indexTp); - dimSizesBuffer = - createFuncCall(rewriter, loc, "getSparseTensorReaderDimSizes", memTp, - reader, EmitCInterface::On) - .getResult(0); - } - Value lvlSizesBuffer; - Value lvlToDimBuffer; - Value dimToLvlBuffer; - if (!stt.isIdentity()) { - const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation() && "Got non-permutation"); - // We preinitialize `dimToLvlValues` since we need random-access writing. - // And we preinitialize the others for stylistic consistency. - SmallVector lvlSizeValues(lvlRank); - SmallVector lvlToDimValues(lvlRank); - SmallVector dimToLvlValues(dimRank); - for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(rewriter, loc, l); - Value dim = constantIndex(rewriter, loc, d); - dimToLvlValues[d] = lvl; - lvlToDimValues[l] = dim; - lvlSizeValues[l] = - stt.isDynamicDim(d) - ? rewriter.create(loc, dimSizesBuffer, dim) - : dimShapeValues[d]; - } - lvlSizesBuffer = allocaBuffer(rewriter, loc, lvlSizeValues); - lvlToDimBuffer = allocaBuffer(rewriter, loc, lvlToDimValues); - dimToLvlBuffer = allocaBuffer(rewriter, loc, dimToLvlValues); - } else { - // The `SparseTensorType` ctor already ensures `dimRank == lvlRank` - // when `isIdentity`; so no need to re-assert it here. - SmallVector iotaValues; - iotaValues.reserve(lvlRank); - for (Level l = 0; l < lvlRank; l++) - iotaValues.push_back(constantIndex(rewriter, loc, l)); - lvlSizesBuffer = dimSizesBuffer ? dimSizesBuffer : dimShapeBuffer; - dimToLvlBuffer = lvlToDimBuffer = allocaBuffer(rewriter, loc, iotaValues); - } + Value reader = genReader(rewriter, loc, stt, adaptor.getOperands()[0], + dimShapesValues, dimSizesBuffer); + // Now construct the lvlSizes, dim2lvl, and lvl2dim buffers. + Value dim2lvlBuffer; + Value lvl2dimBuffer; + Value lvlSizesBuffer = + genReaderBuffers(rewriter, loc, stt, dimShapesValues, dimSizesBuffer, + dim2lvlBuffer, lvl2dimBuffer); // Use the `reader` to parse the file. + Type opaqueTp = getOpaquePointerType(rewriter); + Type eltTp = stt.getElementType(); + Value valTp = constantPrimaryTypeEncoding(rewriter, loc, eltTp); SmallVector params{ reader, lvlSizesBuffer, genLvlTypesBuffer(rewriter, loc, stt), - lvlToDimBuffer, - dimToLvlBuffer, + dim2lvlBuffer, + lvl2dimBuffer, constantPosTypeEncoding(rewriter, loc, stt.getEncoding()), constantCrdTypeEncoding(rewriter, loc, stt.getEncoding()), valTp}; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt index 085d83634a702a8..c48af17b2d94bb7 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -7,6 +7,7 @@ # that is reserved/intended for shared libraries only. add_mlir_library(MLIRSparseTensorRuntime File.cpp + MapRef.cpp NNZ.cpp Storage.cpp diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp new file mode 100644 index 000000000000000..ed458afeae746bc --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -0,0 +1,52 @@ +//===- MapRef.cpp - A dim2lvl/lvl2dim map reference wrapper ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "mlir/ExecutionEngine/SparseTensor/MapRef.h" + +mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, + const uint64_t *l2d) + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { + assert(d2l && l2d); + // Determine the kind of mapping (and asserts on simple inference). + if (isIdentity()) { + kind = MapKind::kIdentity; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[i] == i); + } else if (isPermutation()) { + kind = MapKind::kPermutation; + for (uint64_t i = 0; i < dimRank; i++) + assert(lvl2dim[dim2lvl[i]] == i); + } else { + kind = MapKind::kAffine; + } +} + +bool mlir::sparse_tensor::MapRef::isIdentity() const { + if (dimRank != lvlRank) + return false; + for (uint64_t i = 0; i < dimRank; i++) { + if (dim2lvl[i] != i) + return false; + } + return true; +} + +bool mlir::sparse_tensor::MapRef::isPermutation() const { + if (dimRank != lvlRank) + return false; + std::vector seen(dimRank, false); + for (uint64_t i = 0; i < dimRank; i++) { + const uint64_t j = dim2lvl[i]; + if (j >= dimRank || seen[j]) + return false; + seen[j] = true; + } + return true; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 82cb6d3aeefa35f..5b910716c0f9e59 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -226,11 +226,7 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -// TODO: this swiss-army-knife should be split up into separate functions -// for each action, since the various actions don't agree on (1) whether -// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" -// arguments are actually storage-levels vs target tensor-dimensions, -// (3) whether all the arguments are actually used/required. +// The Swiss-army-knife for sparse tensor creation. void *_mlir_ciface_newSparseTensor( // NOLINT StridedMemRefType *dimSizesRef, StridedMemRefType *lvlSizesRef, @@ -241,18 +237,18 @@ void *_mlir_ciface_newSparseTensor( // NOLINT ASSERT_NO_STRIDE(dimSizesRef); ASSERT_NO_STRIDE(lvlSizesRef); ASSERT_NO_STRIDE(lvlTypesRef); - ASSERT_NO_STRIDE(lvl2dimRef); ASSERT_NO_STRIDE(dim2lvlRef); + ASSERT_NO_STRIDE(lvl2dimRef); const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -403,10 +399,7 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES) #undef IMPL_SPARSECOORDINATES #undef IMPL_GETOVERHEAD -// TODO: while this API design will work for arbitrary dim2lvl mappings, -// we should probably move the `dimCoords`-to-`lvlCoords` computation into -// codegen (since that could enable optimizations to remove the intermediate -// memref). +// TODO: use MapRef here for translation of coordinates #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME( \ void *lvlCOO, StridedMemRefType *vref, \ @@ -506,44 +499,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( aliasIntoMemref(reader.getRank(), dimSizes, *out); } -#define IMPL_GETNEXT(VNAME, V) \ - void _mlir_ciface_getSparseTensorReaderNext##VNAME( \ - void *p, StridedMemRefType *dimCoordsRef, \ - StridedMemRefType *vref) { \ - assert(p &&vref); \ - auto &reader = *static_cast(p); \ - ASSERT_NO_STRIDE(dimCoordsRef); \ - const uint64_t dimRank = MEMREF_GET_USIZE(dimCoordsRef); \ - index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef); \ - V *value = MEMREF_GET_PAYLOAD(vref); \ - *value = reader.readElement(dimRank, dimCoords); \ - } -MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) -#undef IMPL_GETNEXT - #define IMPL_GETNEXT(VNAME, V, CNAME, C) \ bool _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *lvl2dimRef, \ StridedMemRefType *cref, StridedMemRefType *vref) { \ assert(p); \ auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + ASSERT_NO_STRIDE(lvl2dimRef); \ ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ - ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t dimRank = reader.getRank(); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - const uint64_t lvlRank = reader.getRank(); \ - assert(vSize *lvlRank <= cSize); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ - ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)dimRank; \ (void)cSize; \ (void)vSize; \ - (void)lvlRank; \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); \ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ V *values = MEMREF_GET_PAYLOAD(vref); \ - index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ - values); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvl2dim, \ + lvlCoordinates, values); \ } MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) #undef IMPL_GETNEXT @@ -551,8 +533,8 @@ MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp) { assert(p); SparseTensorReader &reader = *static_cast(p); @@ -568,13 +550,13 @@ void *_mlir_ciface_newSparseTensorFromReader( (void)dimRank; const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); - const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); + const index_type *lvl2dim = MEMREF_GET_PAYLOAD(lvl2dimRef); #define CASE(p, c, v, P, C, V) \ if (posTp == OverheadType::p && crdTp == OverheadType::c && \ valTp == PrimaryType::v) \ return static_cast(reader.readSparseTensor( \ - lvlRank, lvlSizes, lvlTypes, lvl2dim, dim2lvl)); + lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim)); #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 4ac768c21aff8fc..ff523e70bfc914a 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> @@ -665,90 +665,94 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK-LABEL: func.func @sparse_new_coo( // CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant false -// CHECK-DAG: %[[A2:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A4:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[D0]][%[[A3]]] : memref<2xindex -// CHECK: memref.store %[[A3]], %[[D0]][%[[A2]]] : memref<2xindex> -// CHECK: %[[A5:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A3]]] : memref -// CHECK: %[[A9:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A10:.*]] = call @getSparseTensorReaderNSE(%[[A5]]) -// CHECK: %[[A11:.*]] = arith.muli %[[A10]], %[[A4]] : index -// CHECK: %[[A12:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A13:.*]] = memref.cast %[[A12]] : memref<2xindex> to memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A11]]) : memref -// CHECK: %[[A15:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A16:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.set %[[A16]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A19:.*]] = sparse_tensor.storage_specifier.get %[[A18]] pos_mem_sz at 0 -// CHECK: %[[A21:.*]], %[[A22:.*]] = sparse_tensor.push_back %[[A19]], %[[A13]], %[[A3]] -// CHECK: %[[A24:.*]] = sparse_tensor.storage_specifier.set %[[A18]] pos_mem_sz at 0 with %[[A22]] -// CHECK: %[[A26:.*]] = sparse_tensor.storage_specifier.set %[[A24]] lvl_sz at 1 with %[[A9]] -// CHECK: %[[A27:.*]], %[[A28:.*]] = sparse_tensor.push_back %[[A22]], %[[A21]], %[[A3]], %[[A2]] -// CHECK: %[[A30:.*]] = sparse_tensor.storage_specifier.set %[[A26]] pos_mem_sz at 0 with %[[A28]] -// CHECK: %[[A31:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A32:.*]] = memref.cast %[[A31]] : memref<2xindex> to memref -// CHECK: memref.store %[[A3]], %[[A31]]{{\[}}%[[A3]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A31]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: %[[A33:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A5]], %[[A32]], %[[A14]], %[[A15]]) -// CHECK: %[[A34:.*]] = arith.cmpi eq, %[[A33]], %[[A1]] : i1 -// CHECK: scf.if %[[A34]] { -// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A10]], %[[A14]] jointly %[[A15]] {ny = 0 : index, perm_map = #{{.*}}} : memref jointly memref -// CHECK: } -// CHECK: memref.store %[[A10]], %[[A27]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A30]] crd_mem_sz at 0 with %[[A11]] -// CHECK: %[[A38:.*]] = sparse_tensor.storage_specifier.set %[[A36]] val_mem_sz with %[[A10]] -// CHECK: call @delSparseTensorReader(%[[A5]]) : (!llvm.ptr) -> () -// CHECK: return %[[A27]], %[[A14]], %[[A15]], %[[A38]] +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref +// CHECK: %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]] pos_mem_sz at 0 +// CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]] pos_mem_sz at 0 with %[[VAL_22]] +// CHECK: %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] lvl_sz at 1 with %[[VAL_12]] +// CHECK: %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]] +// CHECK: %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]] pos_mem_sz at 0 with %[[VAL_26]] +// CHECK: %[[VAL_28:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 +// CHECK: scf.if %[[VAL_31]] { +// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: } +// CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { -// CHECK-DAG: %[[A1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[A2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[A3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 -// CHECK: %[[D0:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[D1:.*]] = memref.cast %[[D0]] : memref<2xindex> to memref -// CHECK: memref.store %[[A2]], %[[D0]][%[[A2]]] : memref<2xindex -// CHECK: memref.store %[[A2]], %[[D0]][%[[A1]]] : memref<2xindex> -// CHECK: %[[A4:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[D1]], %[[C2]]) -// CHECK: %[[D2:.*]] = call @getSparseTensorReaderDimSizes(%0) : (!llvm.ptr) -> memref -// CHECK: %[[A7:.*]] = memref.load %[[D2]]{{\[}}%[[A2]]] : memref -// CHECK: %[[A8:.*]] = memref.load %[[D2]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A9:.*]] = call @getSparseTensorReaderNSE(%[[A4]]) -// CHECK: %[[A10:.*]] = arith.muli %[[A9]], %[[A3]] : index -// CHECK: %[[A11:.*]] = memref.alloc() : memref<2xindex> -// CHECK: %[[A12:.*]] = memref.cast %[[A11]] : memref<2xindex> to memref -// CHECK: %[[A13:.*]] = memref.alloc(%[[A10]]) : memref -// CHECK: %[[A14:.*]] = memref.alloc(%[[A9]]) : memref -// CHECK: %[[A15:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>> -// CHECK: %[[A17:.*]] = sparse_tensor.storage_specifier.set %[[A15]] lvl_sz at 0 with %[[A8]] -// CHECK: %[[A18:.*]] = sparse_tensor.storage_specifier.get %[[A17]] pos_mem_sz at 0 -// CHECK: %[[A20:.*]], %[[A21:.*]] = sparse_tensor.push_back %[[A18]], %[[A12]], %[[A2]] -// CHECK: %[[A23:.*]] = sparse_tensor.storage_specifier.set %[[A17]] pos_mem_sz at 0 with %[[A21]] -// CHECK: %[[A25:.*]] = sparse_tensor.storage_specifier.set %[[A23]] lvl_sz at 1 with %[[A7]] -// CHECK: %[[A26:.*]], %[[A27:.*]] = sparse_tensor.push_back %[[A21]], %[[A20]], %[[A2]], %[[A1]] -// CHECK: %[[A29:.*]] = sparse_tensor.storage_specifier.set %[[A25]] pos_mem_sz at 0 with %[[A27]] -// CHECK: %[[A30:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[A31:.*]] = memref.cast %[[A30]] : memref<2xindex> to memref -// CHECK: memref.store %[[A1]], %[[A30]]{{\[}}%[[A2]]] : memref<2xindex> -// CHECK: memref.store %[[A2]], %[[A30]]{{\[}}%[[A1]]] : memref<2xindex> -// CHECK: %[[A32:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[A4]], %[[A31]], %[[A13]], %[[A14]]) -// CHECK: memref.store %[[A9]], %[[A26]]{{\[}}%[[A1]]] : memref -// CHECK: %[[A34:.*]] = sparse_tensor.storage_specifier.set %[[A29]] crd_mem_sz at 0 with %[[A10]] -// CHECK: %[[A36:.*]] = sparse_tensor.storage_specifier.set %[[A34]] val_mem_sz with %[[A9]] -// CHECK: call @delSparseTensorReader(%[[A4]]) : (!llvm.ptr) -> () -// CHECK: return %[[A26]], %[[A13]], %[[A14]], %[[A36]] +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<2xindex> +// CHECK: %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref +// CHECK: %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref +// CHECK: %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init +// CHECK: %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]] lvl_sz at 0 with %[[VAL_11]] +// CHECK: %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]] pos_mem_sz at 0 +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]] +// CHECK: %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]] pos_mem_sz at 0 with %[[VAL_21]] +// CHECK: %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]] lvl_sz at 1 with %[[VAL_10]] +// CHECK: %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]] +// CHECK: %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]] pos_mem_sz at 0 with %[[VAL_25]] +// CHECK: %[[VAL_27:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_29:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref +// CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] +// CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 9d337b929fa423a..138736e26c1dfdd 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -114,15 +114,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) // CHECK: %[[DimSizes:.*]] = call @getSparseTensorReaderDimSizes(%[[Reader]]) -// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref -// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref -// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref -// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK: %[[T:.*]] = call @newSparseTensorFromReader(%[[Reader]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}) // CHECK: call @delSparseTensorReader(%[[Reader]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { >From 294e87dbc9ed042293201ff53a02de0a49984e40 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:14:22 -0700 Subject: [PATCH 06/15] fix merge conflict --- mlir/test/Dialect/SparseTensor/codegen.mlir | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index ff523e70bfc914a..adefceba7379f99 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -423,7 +423,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: %[[A9:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[A10:.*]] = arith.constant 1 : index // CHECK-DAG: %[[A11:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A12:.*]]:4 = scf.for %[[A13:.*]] = %[[A11]] to %[[A7]] step %[[A10]] iter_args(%[[A14:.*]] = %[[A0]], %[[A15:.*]] = %[[A1]], %[[A16:.*]] = %[[A2]], %[[A17:.*]] = %[[A3]]) // CHECK: %[[A18:.*]] = memref.load %[[A6]]{{\[}}%[[A13]]] : memref // CHECK: %[[A19:.*]] = memref.load %[[A4]]{{\[}}%[[A18]]] : memref @@ -471,7 +471,7 @@ func.func @sparse_compression_1d(%tensor: tensor<100xf64, #SV>, // CHECK: %[[A11:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A12:.*]] = arith.constant 1 : index // CHECK: %[[A13:.*]] = arith.constant 0 : index -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[A7]], %[[A6]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[A7]], %[[A6]] // CHECK: %[[A14:.*]]:4 = scf.for %[[A15:.*]] = %[[A13]] to %[[A7]] step %[[A12]] iter_args(%[[A16:.*]] = %[[A0]], %[[A17:.*]] = %[[A1]], %[[A18:.*]] = %[[A2]], %[[A19:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A20:.*]] = memref.load %[[A6]]{{\[}}%[[A15]]] : memref // CHECK: %[[A21:.*]] = memref.load %[[A4]]{{\[}}%[[A20]]] : memref @@ -507,7 +507,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, return %1 : tensor<8x8xf64, #CSR> } -// CHECK-LABEL: func.func private @_insert_dense_compressed_no_8_8_f64_0_0( +// CHECK-LABEL: func.func private @_insert_dense_compressed_nonordered_8_8_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -533,7 +533,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK: %[[A13:.*]]:4 = scf.for %[[A14:.*]] = %[[A11]] to %[[A7]] step %[[A12]] iter_args(%[[A15:.*]] = %[[A0]], %[[A16:.*]] = %[[A1]], %[[A17:.*]] = %[[A2]], %[[A18:.*]] = %[[A3]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[A19:.*]] = memref.load %[[A6]]{{\[}}%[[A14]]] : memref // CHECK: %[[A20:.*]] = memref.load %[[A4]]{{\[}}%[[A19]]] : memref -// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_no_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier +// CHECK: %[[A21:.*]]:4 = func.call @_insert_dense_compressed_nonordered_8_8_f64_0_0(%[[A15]], %[[A16]], %[[A17]], %[[A18]], %[[A8]], %[[A19]], %[[A20]]) : (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: memref.store %[[A10]], %[[A4]]{{\[}}%[[A19]]] : memref // CHECK: memref.store %[[A9]], %[[A5]]{{\[}}%[[A19]]] : memref // CHECK: scf.yield %[[A21]]#0, %[[A21]]#1, %[[A21]]#2, %[[A21]]#3 : memref, memref, memref, !sparse_tensor.storage_specifier @@ -611,7 +611,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind return %1 : tensor<128xf64, #SparseVector> } -// CHECK-LABEL: func.func private @_insert_compressed_nu_singleton_5_6_f64_0_0( +// CHECK-LABEL: func.func private @_insert_compressed_nonunique_singleton_5_6_f64_0_0( // CHECK-SAME: %[[A1:.*0]]: memref, // CHECK-SAME: %[[A2:.*1]]: memref, // CHECK-SAME: %[[A3:.*2]]: memref, @@ -627,7 +627,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind // CHECK-SAME: %[[A3:.*3]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A4:.*4]]: index, // CHECK-SAME: %[[A5:.*5]]: f64) -// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nu_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) +// CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]]) // CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> { %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo> >From 1ad75e4ae4eaea1429a39e37d556b3ca86a6c041 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:17:46 -0700 Subject: [PATCH 07/15] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 1c155568802e579..a1bd6798f150b43 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < dimRank; ++i) @@ -58,7 +59,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { switch (kind) { case MapKind::kIdentity: for (uint64_t i = 0; i < lvlRank; ++i) >From 67647435de28994a5b7f9d37d2c5f02fe7a917d9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 15:57:59 -0700 Subject: [PATCH 08/15] clang=format --- mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 37ad3c1b042313c..0dd23ac52ac6790 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -229,7 +229,6 @@ class SparseTensorStorageBase { const std::vector lvl2dim; }; - /// A memory-resident sparse tensor using a storage scheme based on /// per-level sparse/dense annotations. This data structure provides /// a bufferized form of a sparse tensor type. In contrast to generating @@ -780,8 +779,7 @@ class SparseTensorEnumeratorBase { //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final - : public SparseTensorEnumeratorBase { +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; >From 493a7318473122e42e6d9a03f895df8eb74039ef Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 19:55:25 -0700 Subject: [PATCH 09/15] ArrayRef --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index ffb1a550957edb8..61fecdad3be9398 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -776,7 +776,7 @@ Value sparse_tensor::genReader(OpBuilder &builder, Location loc, Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapesValues, + ArrayRef dimShapesValues, Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 08ea019d8224a73..698b6c491a9aef7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -355,8 +355,8 @@ Value genReader(OpBuilder &builder, Location loc, SparseTensorType stt, /// Generates code to set up the buffer parameters for a reader. Value genReaderBuffers(OpBuilder &builder, Location loc, SparseTensorType stt, - SmallVectorImpl &dimShapeValues, - Value dimSizesBuffer, /*out*/ Value &dim2lvlBuffer, + ArrayRef dimShapeValues, Value dimSizesBuffer, + /*out*/ Value &dim2lvlBuffer, /*out*/ Value &lvl2dimBuffer); //===----------------------------------------------------------------------===// >From 3e13b908253c1873295fb263537eee3bd40f186e Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 5 Oct 2023 21:08:24 -0700 Subject: [PATCH 10/15] sort_coo -> sort --- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index adefceba7379f99..84904227a636327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -699,7 +699,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { -// CHECK: sparse_tensor.sort_coo hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] +// CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] // CHECK: } // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] >From e562d1ca2297ec907c719b089ce77ea7f91a28a3 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 09:48:36 -0700 Subject: [PATCH 11/15] changed header protos --- mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h index 861b7eff65115b6..f25df11d15fdad1 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -134,8 +134,8 @@ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_createCheckedSparseTensorReader( MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, - StridedMemRefType *lvl2dimRef, - StridedMemRefType *dim2lvlRef, OverheadType posTp, + StridedMemRefType *dim2lvlRef, + StridedMemRefType *lvl2dimRef, OverheadType posTp, OverheadType crdTp, PrimaryType valTp); /// SparseTensorReader method to obtain direct access to the @@ -149,7 +149,8 @@ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_getSparseTensorReaderDimSizes( MLIR_CRUNNERUTILS_EXPORT bool \ _mlir_ciface_getSparseTensorReaderReadToBuffers##CNAME##VNAME( \ void *p, StridedMemRefType *dim2lvlRef, \ - StridedMemRefType *iref, StridedMemRefType *vref) \ + StridedMemRefType *lvl2dimRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) \ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) #undef DECL_GETNEXT >From 0dda2e88c0760a29a099897b1a0751513f510959 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:24:59 -0700 Subject: [PATCH 12/15] simpler MapRef --- .../ExecutionEngine/SparseTensor/MapRef.h | 53 ++++++------------- .../ExecutionEngine/SparseTensor/MapRef.cpp | 28 ++-------- 2 files changed, 21 insertions(+), 60 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index a1bd6798f150b43..e63412498a1abb8 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -16,6 +16,7 @@ #include #include +#include namespace mlir { namespace sparse_tensor { @@ -23,12 +24,11 @@ namespace sparse_tensor { /// A class for capturing the sparse tensor type map with a compact encoding. /// /// Currently, the following situations are supported: -/// (1) map is an identity -/// (2) map is a permutation -/// (3) map has affine ops (restricted set) +/// (1) map is a permutation +/// (2) map has affine ops (restricted set) /// -/// The pushforward/backward operations are fast for (1) and (2) but -/// incur some obvious overhead for situation (3). +/// The pushforward/backward operations are fast for (1) but incur some obvious +/// overhead for situation (2). /// class MapRef final { public: @@ -38,20 +38,12 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template - inline void pushforward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < dimRank; ++i) - out[i] = in[i]; // TODO: optimize with in == out ? - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < dimRank; ++i) - out[dim2lvl[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushforward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < lvlRank; ++i) + out[i] = in[lvl2dim[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -59,20 +51,12 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template - inline void pushbackward(const T *in, T *out) const { - switch (kind) { - case MapKind::kIdentity: - for (uint64_t i = 0; i < lvlRank; ++i) - out[i] = in[i]; - break; - case MapKind::kPermutation: - for (uint64_t i = 0; i < lvlRank; ++i) - out[lvl2dim[i]] = in[i]; - break; - case MapKind::kAffine: + template inline void pushbackward(const T *in, T *out) const { + if (isPermutation) { + for (uint64_t i = 0; i < dimRank; ++i) + out[i] = in[dim2lvl[i]]; + } else { assert(0 && "coming soon"); - break; } } @@ -80,16 +64,13 @@ class MapRef final { uint64_t getLvlRank() const { return lvlRank; } private: - enum class MapKind { kIdentity, kPermutation, kAffine }; - - bool isIdentity() const; - bool isPermutation() const; + bool isPermutationMap() const; - MapKind kind; const uint64_t dimRank; const uint64_t lvlRank; const uint64_t *const dim2lvl; // non-owning pointer const uint64_t *const lvl2dim; // non-owning pointer + const bool isPermutation; }; } // namespace sparse_tensor diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp index ed458afeae746bc..ee4d6fa0d34b491 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -6,39 +6,19 @@ // //===----------------------------------------------------------------------===// -#include - #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d) - : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d) { - assert(d2l && l2d); - // Determine the kind of mapping (and asserts on simple inference). - if (isIdentity()) { - kind = MapKind::kIdentity; - for (uint64_t i = 0; i < dimRank; i++) - assert(lvl2dim[i] == i); - } else if (isPermutation()) { - kind = MapKind::kPermutation; + : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d), + isPermutation(isPermutationMap()) { + if (isPermutation) { for (uint64_t i = 0; i < dimRank; i++) assert(lvl2dim[dim2lvl[i]] == i); - } else { - kind = MapKind::kAffine; - } -} - -bool mlir::sparse_tensor::MapRef::isIdentity() const { - if (dimRank != lvlRank) - return false; - for (uint64_t i = 0; i < dimRank; i++) { - if (dim2lvl[i] != i) - return false; } - return true; } -bool mlir::sparse_tensor::MapRef::isPermutation() const { +bool mlir::sparse_tensor::MapRef::isPermutationMap() const { if (dimRank != lvlRank) return false; std::vector seen(dimRank, false); >From ea50b3820f0a7817b7add513d6b13292a1768620 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 10:46:18 -0700 Subject: [PATCH 13/15] clang-format --- mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index e63412498a1abb8..22ae70a61d95eff 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,7 +38,8 @@ class MapRef final { // Push forward maps from dimensions to levels. // - template inline void pushforward(const T *in, T *out) const { + template + inline void pushforward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < lvlRank; ++i) out[i] = in[lvl2dim[i]]; @@ -51,7 +52,8 @@ class MapRef final { // Push backward maps from levels to dimensions. // - template inline void pushbackward(const T *in, T *out) const { + template + inline void pushbackward(const T *in, T *out) const { if (isPermutation) { for (uint64_t i = 0; i < dimRank; ++i) out[i] = in[dim2lvl[i]]; >From 57a73fe7d1a56cefa1f2d8d1bc43517f8d380247 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 11:14:27 -0700 Subject: [PATCH 14/15] typo --- mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 61fecdad3be9398..c44c5985ee7bba1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -782,7 +782,7 @@ Value sparse_tensor::genReaderBuffers(OpBuilder &builder, Location loc, /*out*/ Value &lvl2dimBuffer) { const Dimension dimRank = stt.getDimRank(); const Level lvlRank = stt.getLvlRank(); - // For an identify mapping, the dim2lvl and lvl2dim mappings are + // For an identity mapping, the dim2lvl and lvl2dim mappings are // identical as are dimSizes and lvlSizes, so buffers are reused // as much as possible. if (stt.isIdentity()) { >From 194dccc6ed0efe283d374668395d2c6b64c996b2 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 6 Oct 2023 13:36:39 -0700 Subject: [PATCH 15/15] bazel build overlay --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 53b626996f8bbfa..1dfba7de465a5ae 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -8643,6 +8643,7 @@ cc_library( name = "SparseTensorRuntime", srcs = [ "lib/ExecutionEngine/SparseTensor/File.cpp", + "lib/ExecutionEngine/SparseTensor/MapRef.cpp", "lib/ExecutionEngine/SparseTensor/NNZ.cpp", "lib/ExecutionEngine/SparseTensor/Storage.cpp", ], @@ -8651,6 +8652,7 @@ cc_library( "include/mlir/ExecutionEngine/SparseTensor/COO.h", "include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h", "include/mlir/ExecutionEngine/SparseTensor/File.h", + "include/mlir/ExecutionEngine/SparseTensor/MapRef.h", "include/mlir/ExecutionEngine/SparseTensor/Storage.h", ], includes = ["include"], From lldb-commits at lists.llvm.org Fri Oct 6 13:42:14 2023 From: lldb-commits at lists.llvm.org (Aart Bik via lldb-commits) Date: Fri, 06 Oct 2023 13:42:14 -0700 (PDT) Subject: [Lldb-commits] [lldb] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360) In-Reply-To: Message-ID: <65207126.170a0220.65d64.348d@mx.google.com> https://github.com/aartbik closed https://github.com/llvm/llvm-project/pull/68360 From lldb-commits at lists.llvm.org Fri Oct 6 14:17:34 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 14:17:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) Message-ID: https://github.com/PortalPete created https://github.com/llvm/llvm-project/pull/68452 The `po` alias now matches the behavior of the `expression` command when the it can apply a Fix-It to an expression. Modifications - Add has `m_fixed_expression` to the `CommandObjectDWIMPrint` class a `protected` member that stores the post Fix-It expression, just like the `CommandObjectExpression` class. - Converted messages to present tense. - Add test cases that confirms a Fix-It for a C++ expression for both `po` and `expressions` rdar://115317419 >From 1d0ac08d38a33ae70687f7b125367c39fbcf92f3 Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] Modify `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 11 +++++++- lldb/source/Commands/CommandObjectDWIMPrint.h | 4 +++ .../Commands/CommandObjectExpression.cpp | 6 ++--- .../source/Commands/CommandObjectExpression.h | 4 ++- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 6 +++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 6 +++++ 10 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..8a5be3e1cd1c36e 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.h b/lldb/source/Commands/CommandObjectDWIMPrint.h index 3fc6c01d4729707..b4e68495b17af07 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.h +++ b/lldb/source/Commands/CommandObjectDWIMPrint.h @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; + private: bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..82283d0fe6a0935 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,10 +439,10 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", m_fixed_expression.c_str()); } diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h index b2b8fc73a1ee831..276da559fb4dbd6 100644 --- a/lldb/source/Commands/CommandObjectExpression.h +++ b/lldb/source/Commands/CommandObjectExpression.h @@ -99,7 +99,9 @@ class CommandObjectExpression : public CommandObjectRaw, CommandOptions m_command_options; uint32_t m_expr_line_count; std::string m_expr_lines; // Multi-line expression support - std::string m_fixed_expression; // Holds the current expression's fixed text. + + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; }; } // namespace lldb_private diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..ad92d0216126965 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..3ead9e4957a632f --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,6 @@ +int main() +{ + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..7517d911723d851 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr -l c++ -- class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..3ead9e4957a632f --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,6 @@ +int main() +{ + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Fri Oct 6 14:18:55 2023 From: lldb-commits at lists.llvm.org (Artem Belevich via lldb-commits) Date: Fri, 06 Oct 2023 14:18:55 -0700 (PDT) Subject: [Lldb-commits] [lldb] [NVPTX] Improve lowering of v4i8 (PR #67866) In-Reply-To: Message-ID: <652079bf.620a0220.1aff.6593@mx.google.com> https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/67866 >From 4771c973c4659b814eacbacc23bd3c6c877ce2da Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 29 Sep 2023 14:37:46 -0700 Subject: [PATCH 1/8] [NVPTX] Improve lowering of v4i8 Make it a legal type and plumb through lowering of relevant instructions. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 20 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 20 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 52 +- llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td | 2 +- .../NVPTX/load-with-non-coherent-cache.ll | 4 +- llvm/test/CodeGen/NVPTX/param-load-store.ll | 26 +- ...unfold-masked-merge-vector-variablemask.ll | 518 ++++-------------- llvm/test/CodeGen/NVPTX/vec8.ll | 5 +- 8 files changed, 177 insertions(+), 470 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0aef2591c6e2394..1daa4971981c25c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -829,6 +829,7 @@ pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, case MVT::v2f16: case MVT::v2bf16: case MVT::v2i16: + case MVT::v4i8: return Opcode_i32; case MVT::f32: return Opcode_f32; @@ -910,7 +911,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { // Vector Setting unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { - assert(Isv2x16VT(LoadedVT) && "Unexpected vector type"); + assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) && + "Unexpected vector type"); // v2f16/v2bf16/v2i16 is loaded using ld.b32 fromTypeWidth = 32; } @@ -1254,6 +1256,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDLoc DL(N); SDNode *LD; SDValue Base, Offset, Addr; + EVT OrigType = N->getValueType(0); EVT EltVT = Mem->getMemoryVT(); unsigned NumElts = 1; @@ -1261,12 +1264,15 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NumElts = EltVT.getVectorNumElements(); EltVT = EltVT.getVectorElementType(); // vectors of 16bits type are loaded/stored as multiples of v2x16 elements. - if ((EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) || - (EltVT == MVT::bf16 && N->getValueType(0) == MVT::v2bf16) || - (EltVT == MVT::i16 && N->getValueType(0) == MVT::v2i16)) { + if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) || + (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) || + (EltVT == MVT::i16 && OrigType == MVT::v2i16)) { assert(NumElts % 2 == 0 && "Vector must have even number of elements"); - EltVT = N->getValueType(0); + EltVT = OrigType; NumElts /= 2; + } else if (OrigType == MVT::v4i8) { + EltVT = OrigType; + NumElts = 1; } } @@ -1601,7 +1607,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { // concept of sign-/zero-extension, so emulate it here by adding an explicit // CVT instruction. Ptxas should clean up any redundancies here. - EVT OrigType = N->getValueType(0); LoadSDNode *LdNode = dyn_cast(N); if (OrigType != EltVT && @@ -1679,7 +1684,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { MVT ScalarVT = SimpleVT.getScalarType(); unsigned toTypeWidth = ScalarVT.getSizeInBits(); if (SimpleVT.isVector()) { - assert(Isv2x16VT(StoreVT) && "Unexpected vector type"); + assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) && + "Unexpected vector type"); // v2x16 is stored using st.b32 toTypeWidth = 32; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b24aae4792ce6a6..7880d70fb2c6fea 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -221,6 +221,11 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, llvm_unreachable("Unexpected type"); } NumElts /= 2; + } else if (EltVT.getSimpleVT() == MVT::i8 && + (NumElts % 4 == 0 || NumElts == 3)) { + // v*i8 are formally lowered as v4i8 + EltVT = MVT::v4i8; + NumElts = (NumElts + 3) / 4; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); @@ -458,6 +463,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass); addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); @@ -2631,7 +2637,7 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return expandUnalignedStore(Store, DAG); // v2f16, v2bf16 and v2i16 don't need special handling. - if (Isv2x16VT(VT)) + if (Isv2x16VT(VT) || VT == MVT::v4i8) return SDValue(); if (VT.isVector()) @@ -2903,7 +2909,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( EVT LoadVT = EltVT; if (EltVT == MVT::i1) LoadVT = MVT::i8; - else if (Isv2x16VT(EltVT)) + else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8) // getLoad needs a vector type, but it can't handle // vectors which contain v2f16 or v2bf16 elements. So we must load // using i32 here and then bitcast back. @@ -2929,7 +2935,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (EltVT == MVT::i1) Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); // v2f16 was loaded as an i32. Now we must bitcast it back. - else if (Isv2x16VT(EltVT)) + else if (EltVT != LoadVT) Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt); // If a promoted integer type is used, truncate down to the original @@ -5256,9 +5262,9 @@ static SDValue PerformEXTRACTCombine(SDNode *N, SDValue Vector = N->getOperand(0); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && - IsPTXVectorType(VectorVT.getSimpleVT())) + IsPTXVectorType(VectorVT.getSimpleVT()) && VectorVT != MVT::v4i8) return SDValue(); // Native vector loads already combine nicely w/ - // extract_vector_elt. + // extract_vector_elt, except for v4i8. // Don't mess with singletons or v2*16 types, we already handle them OK. if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT)) return SDValue(); @@ -5289,6 +5295,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N, // If element has non-integer type, bitcast it back to the expected type. if (EltVT != EltIVT) Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result); + // Past legalizer, we may need to extent i8 -> i16 to match the register type. + if (EltVT != N->getValueType(0)) + Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result); + return Result; } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 28c4cadb303ad4f..047161fb2027dee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1486,23 +1486,24 @@ defm OR : BITWISE<"or", or>; defm AND : BITWISE<"and", and>; defm XOR : BITWISE<"xor", xor>; -// Lower logical v2i16 ops as bitwise ops on b32. -def: Pat<(or (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (ORb32rr Int32Regs:$a, Int32Regs:$b)>; -def: Pat<(xor (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (XORb32rr Int32Regs:$a, Int32Regs:$b)>; -def: Pat<(and (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; - -// The constants get legalized into a bitcast from i32, so that's what we need -// to match here. -def: Pat<(or Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (ORb32ri Int32Regs:$a, imm:$b)>; -def: Pat<(xor Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (XORb32ri Int32Regs:$a, imm:$b)>; -def: Pat<(and Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (ANDb32ri Int32Regs:$a, imm:$b)>; - +// Lower logical v2i16/v4i8 ops as bitwise ops on b32. +foreach vt = [v2i16, v4i8] in { + def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)), + (ORb32rr Int32Regs:$a, Int32Regs:$b)>; + def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)), + (XORb32rr Int32Regs:$a, Int32Regs:$b)>; + def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)), + (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; + + // The constants get legalized into a bitcast from i32, so that's what we need + // to match here. + def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (ORb32ri Int32Regs:$a, imm:$b)>; + def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (XORb32ri Int32Regs:$a, imm:$b)>; + def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (ANDb32ri Int32Regs:$a, imm:$b)>; +} def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), "not.pred \t$dst, $src;", @@ -2682,7 +2683,7 @@ foreach vt = [f16, bf16] in { def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>; } -foreach vt = [v2f16, v2bf16, v2i16] in { +foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>; } @@ -2995,8 +2996,8 @@ def: Pat<(i16 (bitconvert (vt Int16Regs:$a))), (ProxyRegI16 Int16Regs:$a)>; } -foreach ta = [v2f16, v2bf16, v2i16, i32] in { - foreach tb = [v2f16, v2bf16, v2i16, i32] in { +foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in { + foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in { if !ne(ta, tb) then { def: Pat<(ta (bitconvert (tb Int32Regs:$a))), (ProxyRegI32 Int32Regs:$a)>; @@ -3292,6 +3293,10 @@ let hasSideEffects = false in { (ins Int16Regs:$s1, Int16Regs:$s2, Int16Regs:$s3, Int16Regs:$s4), "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; + def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b32 \t$d, {{$s1, $s2, $s3, $s4}};", []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2), "mov.b32 \t$d, {{$s1, $s2}};", []>; @@ -3307,6 +3312,10 @@ let hasSideEffects = false in { Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; + def I32toV4I8 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int32Regs:$s), + "mov.b32 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), "mov.b32 \t{{$d1, $d2}}, $s;", []>; @@ -3354,6 +3363,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; +def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), + (i16 Int16Regs:$c), (i16 Int16Regs:$d))), + (V4I8toI32 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c, Int16Regs:$d)>; // Count leading zeros let hasSideEffects = false in { diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td index ed9dabf39dd7ad9..b5231a9cf67f93a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -58,7 +58,7 @@ foreach i = 0...31 in { //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; -def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16], 32, +def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index 9012339fb6b1e20..98ab93774588d28 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -130,9 +130,9 @@ define void @foo12(ptr noalias readonly %from, ptr %to) { } ; SM20-LABEL: .visible .entry foo13( -; SM20: ld.global.v4.u8 +; SM20: ld.global.u32 ; SM35-LABEL: .visible .entry foo13( -; SM35: ld.global.nc.v4.u8 +; SM35: ld.global.nc.u32 define void @foo13(ptr noalias readonly %from, ptr %to) { %1 = load <4 x i8>, ptr %from store <4 x i8> %1, ptr %to diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 2d87271e30ae0b7..b4208c691c91dfa 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -212,18 +212,16 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v3i8( ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] -; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2]; -; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0]; +; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v3i8_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; -; CHECK: st.param.b8 [param0+2], [[E2]]; +; CHECK: st.param.b32 [param0+0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i8, -; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; -; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}; -; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; +; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0+0]; +; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very +; interesting here, so it's skipped. +; CHECK: st.param.b32 [func_retval0+0], ; CHECK-NEXT: ret; define <3 x i8> @test_v3i8(<3 x i8> %a) { %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); @@ -233,14 +231,14 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v4i8( ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] -; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0] +; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0] ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK: st.param.b32 [param0+0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i8, -; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0+0]; +; CHECK: st.param.b32 [func_retval0+0], [[RET]]; ; CHECK-NEXT: ret; define <4 x i8> @test_v4i8(<4 x i8> %a) { %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); @@ -250,10 +248,10 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v5i8( ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] +; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_v5i8_param_0] ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; -; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0] ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK-DAG: st.param.v4.b8 [param0+0], ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 16579de882ed4b6..74087be4834d966 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -60,46 +60,20 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot2[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b32 %r<21>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<11>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot2; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v4i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v4i8_param_2]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: and.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs7, %rs8}; -; CHECK-NEXT: and.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v4i8_param_1]; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r8, {%rs9, %rs10}; -; CHECK-NEXT: xor.b32 %r9, %r5, 16711935; -; CHECK-NEXT: xor.b32 %r10, %r3, 16711935; -; CHECK-NEXT: and.b32 %r11, %r8, %r10; -; CHECK-NEXT: and.b32 %r12, %r7, %r9; -; CHECK-NEXT: or.b32 %r13, %r6, %r12; -; CHECK-NEXT: mov.b32 {%rs13, %rs14}, %r13; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs13, %rs14}; -; CHECK-NEXT: or.b32 %r14, %r4, %r11; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r14; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs15, %rs16}; -; CHECK-NEXT: ld.u16 %r15, [%SP+0]; -; CHECK-NEXT: shl.b32 %r16, %r15, 16; -; CHECK-NEXT: ld.u16 %r17, [%SP+2]; -; CHECK-NEXT: or.b32 %r18, %r17, %r16; -; CHECK-NEXT: shr.u32 %r19, %r18, 8; -; CHECK-NEXT: cvt.u16.u32 %rs17, %r19; -; CHECK-NEXT: cvt.u16.u32 %rs18, %r15; -; CHECK-NEXT: bfe.s32 %r20, %r15, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r20; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs15, %rs17, %rs18, %rs19}; +; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_param_2]; +; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r1; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r8, %r1, %r7; +; CHECK-NEXT: and.b32 %r9, %r3, %r8; +; CHECK-NEXT: or.b32 %r10, %r5, %r9; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -111,48 +85,20 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot3[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<22>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<11>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot3; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v4i8_undef_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v4i8_undef_param_2]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: and.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs7, %rs8}; -; CHECK-NEXT: and.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v4i8_undef_param_1]; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r8, {%rs9, %rs10}; -; CHECK-NEXT: mov.u16 %rs13, 255; -; CHECK-NEXT: mov.b32 %r9, {%rs14, %rs13}; -; CHECK-NEXT: xor.b32 %r10, %r5, %r9; -; CHECK-NEXT: xor.b32 %r11, %r3, 16711935; -; CHECK-NEXT: and.b32 %r12, %r8, %r11; -; CHECK-NEXT: and.b32 %r13, %r7, %r10; -; CHECK-NEXT: or.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r14; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs15, %rs16}; -; CHECK-NEXT: or.b32 %r15, %r4, %r12; -; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs17, %rs18}; -; CHECK-NEXT: ld.u16 %r16, [%SP+0]; -; CHECK-NEXT: shl.b32 %r17, %r16, 16; -; CHECK-NEXT: ld.u16 %r18, [%SP+2]; -; CHECK-NEXT: or.b32 %r19, %r18, %r17; -; CHECK-NEXT: shr.u32 %r20, %r19, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r20; -; CHECK-NEXT: cvt.u16.u32 %rs20, %r16; -; CHECK-NEXT: bfe.s32 %r21, %r16, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs21, %r21; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs17, %rs19, %rs20, %rs21}; +; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_undef_param_2]; +; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r1; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs2, %rs1}; +; CHECK-NEXT: xor.b32 %r8, %r1, %r7; +; CHECK-NEXT: and.b32 %r9, %r3, %r8; +; CHECK-NEXT: or.b32 %r10, %r5, %r9; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -212,84 +158,24 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwin define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: out_v8i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot6[8]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<40>; -; CHECK-NEXT: .reg .b32 %r<38>; -; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<22>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot6; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v8i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v8i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: mov.b32 %r4, {%rs5, %rs6}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v8i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r5, {%rs9, %rs10}; -; CHECK-NEXT: and.b32 %r6, %r4, %r5; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: and.b32 %r8, %r3, %r7; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [out_v8i8_param_2]; -; CHECK-NEXT: mov.b32 %r9, {%rs13, %rs14}; -; CHECK-NEXT: and.b32 %r10, %r2, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs15, %rs16}; -; CHECK-NEXT: and.b32 %r12, %r1, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [out_v8i8_param_1]; -; CHECK-NEXT: mov.b32 %r13, {%rs19, %rs20}; -; CHECK-NEXT: mov.b32 %r14, {%rs17, %rs18}; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [out_v8i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r15, {%rs23, %rs24}; -; CHECK-NEXT: mov.b32 %r16, {%rs21, %rs22}; -; CHECK-NEXT: xor.b32 %r17, %r11, 16711935; -; CHECK-NEXT: xor.b32 %r18, %r9, 16711935; -; CHECK-NEXT: xor.b32 %r19, %r7, 16711935; -; CHECK-NEXT: xor.b32 %r20, %r5, 16711935; -; CHECK-NEXT: and.b32 %r21, %r16, %r20; -; CHECK-NEXT: and.b32 %r22, %r15, %r19; -; CHECK-NEXT: and.b32 %r23, %r14, %r18; -; CHECK-NEXT: and.b32 %r24, %r13, %r17; -; CHECK-NEXT: or.b32 %r25, %r12, %r24; -; CHECK-NEXT: mov.b32 {%rs25, %rs26}, %r25; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs25, %rs26}; -; CHECK-NEXT: or.b32 %r26, %r10, %r23; -; CHECK-NEXT: mov.b32 {%rs27, %rs28}, %r26; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs27, %rs28}; -; CHECK-NEXT: or.b32 %r27, %r8, %r22; -; CHECK-NEXT: mov.b32 {%rs29, %rs30}, %r27; -; CHECK-NEXT: st.v2.u8 [%SP+4], {%rs29, %rs30}; -; CHECK-NEXT: or.b32 %r28, %r6, %r21; -; CHECK-NEXT: mov.b32 {%rs31, %rs32}, %r28; -; CHECK-NEXT: st.v2.u8 [%SP+6], {%rs31, %rs32}; -; CHECK-NEXT: ld.u16 %r29, [%SP+0]; -; CHECK-NEXT: shl.b32 %r30, %r29, 16; -; CHECK-NEXT: ld.u16 %r31, [%SP+2]; -; CHECK-NEXT: or.b32 %r32, %r31, %r30; -; CHECK-NEXT: cvt.u64.u32 %rd1, %r32; -; CHECK-NEXT: ld.u16 %r33, [%SP+4]; -; CHECK-NEXT: shl.b32 %r34, %r33, 16; -; CHECK-NEXT: ld.u16 %r35, [%SP+6]; -; CHECK-NEXT: or.b32 %r36, %r35, %r34; -; CHECK-NEXT: cvt.u64.u32 %rd2, %r36; -; CHECK-NEXT: shl.b64 %rd3, %rd2, 32; -; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3; -; CHECK-NEXT: shr.u32 %r37, %r36, 8; -; CHECK-NEXT: shr.u64 %rd5, %rd4, 24; -; CHECK-NEXT: cvt.u16.u64 %rs33, %rd5; -; CHECK-NEXT: shr.u64 %rd6, %rd1, 16; -; CHECK-NEXT: cvt.u16.u64 %rs34, %rd6; -; CHECK-NEXT: shr.u64 %rd7, %rd1, 8; -; CHECK-NEXT: cvt.u16.u64 %rs35, %rd7; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs27, %rs35, %rs34, %rs33}; -; CHECK-NEXT: cvt.u16.u32 %rs36, %r37; -; CHECK-NEXT: bfe.s64 %rd8, %rd2, 24, 8; -; CHECK-NEXT: cvt.u16.u64 %rs37, %rd8; -; CHECK-NEXT: cvt.u16.u32 %rs38, %r33; -; CHECK-NEXT: cvt.u16.u32 %rs39, %r35; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs39, %rs36, %rs38, %rs37}; +; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_1]; +; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [out_v8i8_param_2]; +; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v8i8_param_0]; +; CHECK-NEXT: and.b32 %r11, %r9, %r5; +; CHECK-NEXT: and.b32 %r13, %r10, %r6; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r15, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r16, %r6, %r15; +; CHECK-NEXT: xor.b32 %r17, %r5, %r15; +; CHECK-NEXT: and.b32 %r18, %r1, %r17; +; CHECK-NEXT: and.b32 %r19, %r2, %r16; +; CHECK-NEXT: or.b32 %r20, %r13, %r19; +; CHECK-NEXT: or.b32 %r21, %r11, %r18; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; ; CHECK-NEXT: ret; %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, @@ -408,90 +294,32 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwin define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: out_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<65>; -; CHECK-NEXT: .reg .b32 %r<57>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<42>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v16i8_param_0+12]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v16i8_param_0+8]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: mov.b32 %r4, {%rs7, %rs8}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v16i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r5, {%rs9, %rs10}; -; CHECK-NEXT: mov.b32 %r6, {%rs11, %rs12}; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [out_v16i8_param_0]; -; CHECK-NEXT: mov.b32 %r7, {%rs13, %rs14}; -; CHECK-NEXT: mov.b32 %r8, {%rs15, %rs16}; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [out_v16i8_param_2]; -; CHECK-NEXT: mov.b32 %r9, {%rs19, %rs20}; -; CHECK-NEXT: and.b32 %r10, %r8, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs17, %rs18}; -; CHECK-NEXT: and.b32 %r12, %r7, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [out_v16i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r13, {%rs23, %rs24}; -; CHECK-NEXT: and.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs21, %rs22}; -; CHECK-NEXT: and.b32 %r16, %r5, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs25, %rs26, %rs27, %rs28}, [out_v16i8_param_2+8]; -; CHECK-NEXT: mov.b32 %r17, {%rs27, %rs28}; -; CHECK-NEXT: and.b32 %r18, %r4, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs25, %rs26}; -; CHECK-NEXT: and.b32 %r20, %r3, %r19; -; CHECK-NEXT: ld.param.v4.u8 {%rs29, %rs30, %rs31, %rs32}, [out_v16i8_param_2+12]; -; CHECK-NEXT: mov.b32 %r21, {%rs31, %rs32}; -; CHECK-NEXT: and.b32 %r22, %r2, %r21; -; CHECK-NEXT: mov.b32 %r23, {%rs29, %rs30}; -; CHECK-NEXT: and.b32 %r24, %r1, %r23; -; CHECK-NEXT: ld.param.v4.u8 {%rs33, %rs34, %rs35, %rs36}, [out_v16i8_param_1+12]; -; CHECK-NEXT: mov.b32 %r25, {%rs33, %rs34}; -; CHECK-NEXT: mov.b32 %r26, {%rs35, %rs36}; -; CHECK-NEXT: ld.param.v4.u8 {%rs37, %rs38, %rs39, %rs40}, [out_v16i8_param_1+8]; -; CHECK-NEXT: mov.b32 %r27, {%rs37, %rs38}; -; CHECK-NEXT: mov.b32 %r28, {%rs39, %rs40}; -; CHECK-NEXT: ld.param.v4.u8 {%rs41, %rs42, %rs43, %rs44}, [out_v16i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r29, {%rs41, %rs42}; -; CHECK-NEXT: mov.b32 %r30, {%rs43, %rs44}; -; CHECK-NEXT: ld.param.v4.u8 {%rs45, %rs46, %rs47, %rs48}, [out_v16i8_param_1]; -; CHECK-NEXT: mov.b32 %r31, {%rs45, %rs46}; -; CHECK-NEXT: mov.b32 %r32, {%rs47, %rs48}; -; CHECK-NEXT: xor.b32 %r33, %r23, 16711935; -; CHECK-NEXT: xor.b32 %r34, %r21, 16711935; -; CHECK-NEXT: xor.b32 %r35, %r19, 16711935; -; CHECK-NEXT: xor.b32 %r36, %r17, 16711935; -; CHECK-NEXT: xor.b32 %r37, %r15, 16711935; -; CHECK-NEXT: xor.b32 %r38, %r13, 16711935; -; CHECK-NEXT: xor.b32 %r39, %r11, 16711935; -; CHECK-NEXT: xor.b32 %r40, %r9, 16711935; -; CHECK-NEXT: and.b32 %r41, %r32, %r40; -; CHECK-NEXT: and.b32 %r42, %r31, %r39; -; CHECK-NEXT: and.b32 %r43, %r30, %r38; -; CHECK-NEXT: and.b32 %r44, %r29, %r37; -; CHECK-NEXT: and.b32 %r45, %r28, %r36; -; CHECK-NEXT: and.b32 %r46, %r27, %r35; -; CHECK-NEXT: and.b32 %r47, %r26, %r34; -; CHECK-NEXT: and.b32 %r48, %r25, %r33; -; CHECK-NEXT: or.b32 %r49, %r24, %r48; -; CHECK-NEXT: or.b32 %r50, %r22, %r47; -; CHECK-NEXT: or.b32 %r51, %r20, %r46; -; CHECK-NEXT: or.b32 %r52, %r18, %r45; -; CHECK-NEXT: or.b32 %r53, %r16, %r44; -; CHECK-NEXT: or.b32 %r54, %r14, %r43; -; CHECK-NEXT: or.b32 %r55, %r12, %r42; -; CHECK-NEXT: or.b32 %r56, %r10, %r41; -; CHECK-NEXT: mov.b32 {%rs49, %rs50}, %r56; -; CHECK-NEXT: mov.b32 {%rs51, %rs52}, %r55; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs51, %rs52, %rs49, %rs50}; -; CHECK-NEXT: mov.b32 {%rs53, %rs54}, %r54; -; CHECK-NEXT: mov.b32 {%rs55, %rs56}, %r53; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs55, %rs56, %rs53, %rs54}; -; CHECK-NEXT: mov.b32 {%rs57, %rs58}, %r52; -; CHECK-NEXT: mov.b32 {%rs59, %rs60}, %r51; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs59, %rs60, %rs57, %rs58}; -; CHECK-NEXT: mov.b32 {%rs61, %rs62}, %r50; -; CHECK-NEXT: mov.b32 {%rs63, %rs64}, %r49; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs63, %rs64, %rs61, %rs62}; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_1]; +; CHECK-NEXT: ld.param.v4.u32 {%r9, %r10, %r11, %r12}, [out_v16i8_param_2]; +; CHECK-NEXT: ld.param.v4.u32 {%r17, %r18, %r19, %r20}, [out_v16i8_param_0]; +; CHECK-NEXT: and.b32 %r21, %r17, %r9; +; CHECK-NEXT: and.b32 %r23, %r18, %r10; +; CHECK-NEXT: and.b32 %r25, %r19, %r11; +; CHECK-NEXT: and.b32 %r27, %r20, %r12; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r29, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r30, %r12, %r29; +; CHECK-NEXT: xor.b32 %r31, %r11, %r29; +; CHECK-NEXT: xor.b32 %r32, %r10, %r29; +; CHECK-NEXT: xor.b32 %r33, %r9, %r29; +; CHECK-NEXT: and.b32 %r34, %r1, %r33; +; CHECK-NEXT: and.b32 %r35, %r2, %r32; +; CHECK-NEXT: and.b32 %r36, %r3, %r31; +; CHECK-NEXT: and.b32 %r37, %r4, %r30; +; CHECK-NEXT: or.b32 %r38, %r27, %r37; +; CHECK-NEXT: or.b32 %r39, %r25, %r36; +; CHECK-NEXT: or.b32 %r40, %r23, %r35; +; CHECK-NEXT: or.b32 %r41, %r21, %r34; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r41, %r40, %r39, %r38}; ; CHECK-NEXT: ret; %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, @@ -684,44 +512,16 @@ define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: in_v4i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot18[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-NEXT: .reg .b32 %r<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot18; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v4i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v4i8_param_1]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: xor.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs5, %rs6}; -; CHECK-NEXT: xor.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v4i8_param_2]; -; CHECK-NEXT: mov.b32 %r7, {%rs9, %rs10}; -; CHECK-NEXT: and.b32 %r8, %r6, %r7; -; CHECK-NEXT: mov.b32 %r9, {%rs11, %rs12}; -; CHECK-NEXT: and.b32 %r10, %r4, %r9; -; CHECK-NEXT: xor.b32 %r11, %r10, %r3; -; CHECK-NEXT: mov.b32 {%rs13, %rs14}, %r11; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs13, %rs14}; -; CHECK-NEXT: xor.b32 %r12, %r8, %r5; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r12; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs15, %rs16}; -; CHECK-NEXT: ld.u16 %r13, [%SP+0]; -; CHECK-NEXT: shl.b32 %r14, %r13, 16; -; CHECK-NEXT: ld.u16 %r15, [%SP+2]; -; CHECK-NEXT: or.b32 %r16, %r15, %r14; -; CHECK-NEXT: shr.u32 %r17, %r16, 8; -; CHECK-NEXT: cvt.u16.u32 %rs17, %r17; -; CHECK-NEXT: cvt.u16.u32 %rs18, %r13; -; CHECK-NEXT: bfe.s32 %r18, %r13, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r18; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs15, %rs17, %rs18, %rs19}; +; CHECK-NEXT: ld.param.u32 %r1, [in_v4i8_param_0]; +; CHECK-NEXT: ld.param.u32 %r2, [in_v4i8_param_1]; +; CHECK-NEXT: xor.b32 %r3, %r1, %r2; +; CHECK-NEXT: ld.param.u32 %r4, [in_v4i8_param_2]; +; CHECK-NEXT: and.b32 %r5, %r3, %r4; +; CHECK-NEXT: xor.b32 %r6, %r5, %r2; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; ; CHECK-NEXT: ret; %n0 = xor <4 x i8> %x, %y %n1 = and <4 x i8> %n0, %mask @@ -776,80 +576,19 @@ define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: in_v8i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot21[8]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<40>; -; CHECK-NEXT: .reg .b32 %r<34>; -; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-NEXT: .reg .b32 %r<15>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot21; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v8i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v8i8_param_0]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: mov.b32 %r4, {%rs7, %rs8}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v8i8_param_1]; -; CHECK-NEXT: mov.b32 %r5, {%rs11, %rs12}; -; CHECK-NEXT: xor.b32 %r6, %r4, %r5; -; CHECK-NEXT: mov.b32 %r7, {%rs9, %rs10}; -; CHECK-NEXT: xor.b32 %r8, %r3, %r7; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [in_v8i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r9, {%rs15, %rs16}; -; CHECK-NEXT: xor.b32 %r10, %r2, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs13, %rs14}; -; CHECK-NEXT: xor.b32 %r12, %r1, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [in_v8i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r13, {%rs17, %rs18}; -; CHECK-NEXT: and.b32 %r14, %r12, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs19, %rs20}; -; CHECK-NEXT: and.b32 %r16, %r10, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [in_v8i8_param_2]; -; CHECK-NEXT: mov.b32 %r17, {%rs21, %rs22}; -; CHECK-NEXT: and.b32 %r18, %r8, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs23, %rs24}; -; CHECK-NEXT: and.b32 %r20, %r6, %r19; -; CHECK-NEXT: xor.b32 %r21, %r20, %r5; -; CHECK-NEXT: mov.b32 {%rs25, %rs26}, %r21; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs25, %rs26}; -; CHECK-NEXT: xor.b32 %r22, %r18, %r7; -; CHECK-NEXT: mov.b32 {%rs27, %rs28}, %r22; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs27, %rs28}; -; CHECK-NEXT: xor.b32 %r23, %r16, %r9; -; CHECK-NEXT: mov.b32 {%rs29, %rs30}, %r23; -; CHECK-NEXT: st.v2.u8 [%SP+4], {%rs29, %rs30}; -; CHECK-NEXT: xor.b32 %r24, %r14, %r11; -; CHECK-NEXT: mov.b32 {%rs31, %rs32}, %r24; -; CHECK-NEXT: st.v2.u8 [%SP+6], {%rs31, %rs32}; -; CHECK-NEXT: ld.u16 %r25, [%SP+0]; -; CHECK-NEXT: shl.b32 %r26, %r25, 16; -; CHECK-NEXT: ld.u16 %r27, [%SP+2]; -; CHECK-NEXT: or.b32 %r28, %r27, %r26; -; CHECK-NEXT: cvt.u64.u32 %rd1, %r28; -; CHECK-NEXT: ld.u16 %r29, [%SP+4]; -; CHECK-NEXT: shl.b32 %r30, %r29, 16; -; CHECK-NEXT: ld.u16 %r31, [%SP+6]; -; CHECK-NEXT: or.b32 %r32, %r31, %r30; -; CHECK-NEXT: cvt.u64.u32 %rd2, %r32; -; CHECK-NEXT: shl.b64 %rd3, %rd2, 32; -; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3; -; CHECK-NEXT: shr.u32 %r33, %r32, 8; -; CHECK-NEXT: shr.u64 %rd5, %rd4, 24; -; CHECK-NEXT: cvt.u16.u64 %rs33, %rd5; -; CHECK-NEXT: shr.u64 %rd6, %rd1, 16; -; CHECK-NEXT: cvt.u16.u64 %rs34, %rd6; -; CHECK-NEXT: shr.u64 %rd7, %rd1, 8; -; CHECK-NEXT: cvt.u16.u64 %rs35, %rd7; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs27, %rs35, %rs34, %rs33}; -; CHECK-NEXT: cvt.u16.u32 %rs36, %r33; -; CHECK-NEXT: bfe.s64 %rd8, %rd2, 24, 8; -; CHECK-NEXT: cvt.u16.u64 %rs37, %rd8; -; CHECK-NEXT: cvt.u16.u32 %rs38, %r29; -; CHECK-NEXT: cvt.u16.u32 %rs39, %r31; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs39, %rs36, %rs38, %rs37}; +; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [in_v8i8_param_0]; +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [in_v8i8_param_1]; +; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [in_v8i8_param_2]; +; CHECK-NEXT: xor.b32 %r7, %r2, %r4; +; CHECK-NEXT: and.b32 %r8, %r7, %r6; +; CHECK-NEXT: xor.b32 %r9, %r8, %r4; +; CHECK-NEXT: xor.b32 %r11, %r1, %r3; +; CHECK-NEXT: and.b32 %r12, %r11, %r5; +; CHECK-NEXT: xor.b32 %r13, %r12, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r13, %r9}; ; CHECK-NEXT: ret; %n0 = xor <8 x i8> %x, %y %n1 = and <8 x i8> %n0, %mask @@ -930,82 +669,25 @@ define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: in_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<65>; -; CHECK-NEXT: .reg .b32 %r<49>; +; CHECK-NEXT: .reg .b32 %r<29>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v16i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v16i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: mov.b32 %r4, {%rs5, %rs6}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v16i8_param_0+8]; -; CHECK-NEXT: mov.b32 %r5, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r6, {%rs9, %rs10}; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [in_v16i8_param_0+12]; -; CHECK-NEXT: mov.b32 %r7, {%rs15, %rs16}; -; CHECK-NEXT: mov.b32 %r8, {%rs13, %rs14}; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [in_v16i8_param_1+12]; -; CHECK-NEXT: mov.b32 %r9, {%rs17, %rs18}; -; CHECK-NEXT: xor.b32 %r10, %r8, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs19, %rs20}; -; CHECK-NEXT: xor.b32 %r12, %r7, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [in_v16i8_param_1+8]; -; CHECK-NEXT: mov.b32 %r13, {%rs21, %rs22}; -; CHECK-NEXT: xor.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs23, %rs24}; -; CHECK-NEXT: xor.b32 %r16, %r5, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs25, %rs26, %rs27, %rs28}, [in_v16i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r17, {%rs25, %rs26}; -; CHECK-NEXT: xor.b32 %r18, %r4, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs27, %rs28}; -; CHECK-NEXT: xor.b32 %r20, %r3, %r19; -; CHECK-NEXT: ld.param.v4.u8 {%rs29, %rs30, %rs31, %rs32}, [in_v16i8_param_1]; -; CHECK-NEXT: mov.b32 %r21, {%rs29, %rs30}; -; CHECK-NEXT: xor.b32 %r22, %r2, %r21; -; CHECK-NEXT: mov.b32 %r23, {%rs31, %rs32}; -; CHECK-NEXT: xor.b32 %r24, %r1, %r23; -; CHECK-NEXT: ld.param.v4.u8 {%rs33, %rs34, %rs35, %rs36}, [in_v16i8_param_2]; -; CHECK-NEXT: mov.b32 %r25, {%rs35, %rs36}; -; CHECK-NEXT: and.b32 %r26, %r24, %r25; -; CHECK-NEXT: mov.b32 %r27, {%rs33, %rs34}; -; CHECK-NEXT: and.b32 %r28, %r22, %r27; -; CHECK-NEXT: ld.param.v4.u8 {%rs37, %rs38, %rs39, %rs40}, [in_v16i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r29, {%rs39, %rs40}; -; CHECK-NEXT: and.b32 %r30, %r20, %r29; -; CHECK-NEXT: mov.b32 %r31, {%rs37, %rs38}; -; CHECK-NEXT: and.b32 %r32, %r18, %r31; -; CHECK-NEXT: ld.param.v4.u8 {%rs41, %rs42, %rs43, %rs44}, [in_v16i8_param_2+8]; -; CHECK-NEXT: mov.b32 %r33, {%rs43, %rs44}; -; CHECK-NEXT: and.b32 %r34, %r16, %r33; -; CHECK-NEXT: mov.b32 %r35, {%rs41, %rs42}; -; CHECK-NEXT: and.b32 %r36, %r14, %r35; -; CHECK-NEXT: ld.param.v4.u8 {%rs45, %rs46, %rs47, %rs48}, [in_v16i8_param_2+12]; -; CHECK-NEXT: mov.b32 %r37, {%rs47, %rs48}; -; CHECK-NEXT: and.b32 %r38, %r12, %r37; -; CHECK-NEXT: mov.b32 %r39, {%rs45, %rs46}; -; CHECK-NEXT: and.b32 %r40, %r10, %r39; -; CHECK-NEXT: xor.b32 %r41, %r40, %r9; -; CHECK-NEXT: xor.b32 %r42, %r38, %r11; -; CHECK-NEXT: xor.b32 %r43, %r36, %r13; -; CHECK-NEXT: xor.b32 %r44, %r34, %r15; -; CHECK-NEXT: xor.b32 %r45, %r32, %r17; -; CHECK-NEXT: xor.b32 %r46, %r30, %r19; -; CHECK-NEXT: xor.b32 %r47, %r28, %r21; -; CHECK-NEXT: xor.b32 %r48, %r26, %r23; -; CHECK-NEXT: mov.b32 {%rs49, %rs50}, %r48; -; CHECK-NEXT: mov.b32 {%rs51, %rs52}, %r47; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs51, %rs52, %rs49, %rs50}; -; CHECK-NEXT: mov.b32 {%rs53, %rs54}, %r46; -; CHECK-NEXT: mov.b32 {%rs55, %rs56}, %r45; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs55, %rs56, %rs53, %rs54}; -; CHECK-NEXT: mov.b32 {%rs57, %rs58}, %r44; -; CHECK-NEXT: mov.b32 {%rs59, %rs60}, %r43; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs59, %rs60, %rs57, %rs58}; -; CHECK-NEXT: mov.b32 {%rs61, %rs62}, %r42; -; CHECK-NEXT: mov.b32 {%rs63, %rs64}, %r41; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs63, %rs64, %rs61, %rs62}; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v16i8_param_0]; +; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v16i8_param_1]; +; CHECK-NEXT: xor.b32 %r9, %r4, %r8; +; CHECK-NEXT: xor.b32 %r10, %r3, %r7; +; CHECK-NEXT: xor.b32 %r11, %r2, %r6; +; CHECK-NEXT: xor.b32 %r12, %r1, %r5; +; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v16i8_param_2]; +; CHECK-NEXT: and.b32 %r17, %r12, %r13; +; CHECK-NEXT: and.b32 %r18, %r11, %r14; +; CHECK-NEXT: and.b32 %r19, %r10, %r15; +; CHECK-NEXT: and.b32 %r20, %r9, %r16; +; CHECK-NEXT: xor.b32 %r21, %r20, %r8; +; CHECK-NEXT: xor.b32 %r23, %r19, %r7; +; CHECK-NEXT: xor.b32 %r25, %r18, %r6; +; CHECK-NEXT: xor.b32 %r27, %r17, %r5; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r27, %r25, %r23, %r21}; ; CHECK-NEXT: ret; %n0 = xor <16 x i8> %x, %y %n1 = and <16 x i8> %n0, %mask diff --git a/llvm/test/CodeGen/NVPTX/vec8.ll b/llvm/test/CodeGen/NVPTX/vec8.ll index 092607462f3329b..8333a9b935d6af8 100644 --- a/llvm/test/CodeGen/NVPTX/vec8.ll +++ b/llvm/test/CodeGen/NVPTX/vec8.ll @@ -5,10 +5,9 @@ target triple = "nvptx-unknown-cuda" ; CHECK: .visible .func foo define void @foo(<8 x i8> %a, ptr %b) { -; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [foo_param_0] -; CHECK-DAG: ld.param.v4.u8 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [foo_param_0+4] +; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo_param_0] ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9+]]], [foo_param_1] -; CHECK: add.s16 [[T:%rs[0-9+]]], [[E1]], [[E6]]; +; CHECK: add.s16 [[T:%rs[0-9+]]], ; CHECK: st.u8 [%[[B]]], [[T]]; %t0 = extractelement <8 x i8> %a, i32 1 %t1 = extractelement <8 x i8> %a, i32 6 >From bda4bd36ded20dba4ac89824a42b8a2017c41247 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 2 Oct 2023 18:05:42 -0700 Subject: [PATCH 2/8] More work on fleshing out extractelt/build_vector for v4i8 Verified that NVPTX tests pass with ptxas being able to compiler PTX produced by llc tests. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 18 ++++ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 + llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 71 +++++++------- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 64 +++++++++---- llvm/test/CodeGen/NVPTX/extractelement.ll | 55 ++++++++++- llvm/test/CodeGen/NVPTX/i16x2-instructions.ll | 2 +- ...unfold-masked-merge-vector-variablemask.ll | 95 ++++++++----------- 7 files changed, 196 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 1daa4971981c25c..c3bcf8f05a278ad 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsNVPTX.h" @@ -3569,6 +3570,23 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); } +bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, + SDValue &BitOffset) { + SDValue Vector = N->getOperand(0); + if (!(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Vector->getValueType(0) == MVT::v4i8)) + return false; + + if (const ConstantSDNode *IdxConst = + dyn_cast(N->getOperand(1))) { + V = Vector; + BitOffset = CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, + SDLoc(N), MVT::i32); + return true; + } + return false; +} + bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 06922331f5e2059..34b5dd449ce086f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -95,6 +95,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { SDValue &Offset); bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectExtractEltFromV4I8(SDValue N, SDValue &Value, SDValue &Idx); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7880d70fb2c6fea..66dcdb53b136b96 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -497,6 +497,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand); + // TODO: we should eventually lower it as PRMT instruction. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom); + // Operations not directly supported by NVPTX. for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, MVT::f64, @@ -2156,45 +2160,47 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return DAG.getBuildVector(Node->getValueType(0), dl, Ops); } -// We can init constant f16x2 with a single .b32 move. Normally it +// We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it // would get lowered as two constant loads and vector-packing move. -// mov.b16 %h1, 0x4000; -// mov.b16 %h2, 0x3C00; -// mov.b32 %hh2, {%h2, %h1}; // Instead we want just a constant move: // mov.b32 %hh2, 0x40003C00 -// -// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0 -// generates good SASS in both cases. SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); - if (!(Isv2x16VT(VT))) + if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) + return Op; + + if (!llvm::all_of(Op->ops(), [](SDValue Operand) { + return Operand->isUndef() || isa(Operand) || + isa(Operand); + })) return Op; - APInt E0; - APInt E1; - if (VT == MVT::v2f16 || VT == MVT::v2bf16) { - if (!(isa(Op->getOperand(0)) && - isa(Op->getOperand(1)))) - return Op; - - E0 = cast(Op->getOperand(0)) - ->getValueAPF() - .bitcastToAPInt(); - E1 = cast(Op->getOperand(1)) - ->getValueAPF() - .bitcastToAPInt(); - } else { - assert(VT == MVT::v2i16); - if (!(isa(Op->getOperand(0)) && - isa(Op->getOperand(1)))) - return Op; - E0 = cast(Op->getOperand(0))->getAPIntValue(); - E1 = cast(Op->getOperand(1))->getAPIntValue(); + // Get value or the Nth operand as an APInt(32). Undef values treated as 0. + auto GetOperand = [](SDValue Op, int N) -> APInt { + const SDValue &Operand = Op->getOperand(N); + EVT VT = Op->getValueType(0); + if (Operand->isUndef()) + return APInt(32, 0); + APInt Value; + if (VT == MVT::v2f16 || VT == MVT::v2bf16) + Value = cast(Operand)->getValueAPF().bitcastToAPInt(); + else if (VT == MVT::v2i16 || VT == MVT::v4i8) + Value = cast(Operand)->getAPIntValue(); + else + llvm_unreachable("Unsupported type"); + return Value.zext(32); + }; + APInt Value; + if (Isv2x16VT(VT)) { + Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16); + } else if (VT == MVT::v4i8) { + Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) | + GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24); + } else { + llvm_unreachable("Unsupported type"); } - SDValue Const = - DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); + SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32); return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const); } @@ -5262,11 +5268,12 @@ static SDValue PerformEXTRACTCombine(SDNode *N, SDValue Vector = N->getOperand(0); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && - IsPTXVectorType(VectorVT.getSimpleVT()) && VectorVT != MVT::v4i8) + IsPTXVectorType(VectorVT.getSimpleVT())) return SDValue(); // Native vector loads already combine nicely w/ // extract_vector_elt, except for v4i8. // Don't mess with singletons or v2*16 types, we already handle them OK. - if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT)) + if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) || + VectorVT == MVT::v4i8) return SDValue(); uint64_t VectorBits = VectorVT.getSizeInBits(); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 047161fb2027dee..307963aaa800b88 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1738,7 +1738,7 @@ def FUNSHFRCLAMP : // restriction in PTX? // // dest and src may be int32 or int64, but start and end are always int32. -multiclass BFX { +multiclass BFE { def rrr : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), @@ -1752,17 +1752,29 @@ multiclass BFX { (ins RC:$a, i32imm:$b, i32imm:$c), !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; } +multiclass BFI { + def rrr + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + def rri + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + def rii + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; +} let hasSideEffects = false in { - defm BFE_S32 : BFX<"bfe.s32", Int32Regs>; - defm BFE_U32 : BFX<"bfe.u32", Int32Regs>; - defm BFE_S64 : BFX<"bfe.s64", Int64Regs>; - defm BFE_U64 : BFX<"bfe.u64", Int64Regs>; - - defm BFI_S32 : BFX<"bfi.s32", Int32Regs>; - defm BFI_U32 : BFX<"bfi.u32", Int32Regs>; - defm BFI_S64 : BFX<"bfi.s64", Int64Regs>; - defm BFI_U64 : BFX<"bfi.u64", Int64Regs>; + defm BFE_S32 : BFE<"bfe.s32", Int32Regs>; + defm BFE_U32 : BFE<"bfe.u32", Int32Regs>; + defm BFE_S64 : BFE<"bfe.s64", Int64Regs>; + defm BFE_U64 : BFE<"bfe.u64", Int64Regs>; + + defm BFI_B32 : BFI<"bfi.b32", Int32Regs>; + defm BFI_B64 : BFI<"bfi.b64", Int64Regs>; } // Common byte extraction patterns @@ -1782,6 +1794,24 @@ def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; +def ExtractFromV4I8 : ComplexPattern; +def: Pat<(i32 (sext_inreg (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), i8)), + (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8)>; +def: Pat<(i32 (and (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), 255)), + (BFE_U32rii Int32Regs:$src, imm:$bitidx, 8)>; +def: Pat<(i16 (sext_inreg (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), i8)), + (CVT_s8_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; +def: Pat<(ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), + (CVT_s16_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; + + +def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), + (i16 Int16Regs:$c), (i16 Int16Regs:$d))), + (BFI_B32rii + (BFI_B32rii (CVT_u32_u16 Int16Regs:$d, CvtNONE), (CVT_u32_u16 Int16Regs:$c, CvtNONE), 8, 8), + (BFI_B32rii (CVT_u32_u16 Int16Regs:$b, CvtNONE), (CVT_u32_u16 Int16Regs:$a, CvtNONE), 8, 8), + 16, 16)>; + //----------------------------------- // Comparison instructions (setp, set) //----------------------------------- @@ -3293,10 +3323,6 @@ let hasSideEffects = false in { (ins Int16Regs:$s1, Int16Regs:$s2, Int16Regs:$s3, Int16Regs:$s4), "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; - def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), - "mov.b32 \t$d, {{$s1, $s2, $s3, $s4}};", []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2), "mov.b32 \t$d, {{$s1, $s2}};", []>; @@ -3312,10 +3338,6 @@ let hasSideEffects = false in { Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; - def I32toV4I8 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, - Int16Regs:$d3, Int16Regs:$d4), - (ins Int32Regs:$s), - "mov.b32 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), "mov.b32 \t{{$d1, $d2}}, $s;", []>; @@ -3351,6 +3373,9 @@ def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))), def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))), (I64toI32H Int64Regs:$s)>; +def: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))), + (CVT_INREG_s32_s16 Int32Regs:$src)>; + foreach vt = [v2f16, v2bf16, v2i16] in { def : Pat<(extractelt (vt Int32Regs:$src), 0), (I32toI16L Int32Regs:$src)>; @@ -3363,9 +3388,6 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; -def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), - (i16 Int16Regs:$c), (i16 Int16Regs:$d))), - (V4I8toI32 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c, Int16Regs:$d)>; // Count leading zeros let hasSideEffects = false in { diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index da07f973501c855..ed7dd45ab7b4502 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -18,7 +18,8 @@ define i16 @test_v2i8(i16 %a) { ; CHECK-LABEL: test_v4i8 ; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_param_0]; -; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R]]; +; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R0]]; ; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8; ; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9+]]], [[R1]]; ; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8; @@ -41,6 +42,58 @@ define i16 @test_v4i8(i32 %a) { ret i16 %r } +; CHECK-LABEL: test_v4i8_s32 +; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_s32_param_0]; +; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8; +; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8; +; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8; +; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]] +; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]] +; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]] +define i32 @test_v4i8_s32(i32 %a) { + %v = bitcast i32 %a to <4 x i8> + %r0 = extractelement <4 x i8> %v, i64 0 + %r1 = extractelement <4 x i8> %v, i64 1 + %r2 = extractelement <4 x i8> %v, i64 2 + %r3 = extractelement <4 x i8> %v, i64 3 + %r0i = sext i8 %r0 to i32 + %r1i = sext i8 %r1 to i32 + %r2i = sext i8 %r2 to i32 + %r3i = sext i8 %r3 to i32 + %r01 = add i32 %r0i, %r1i + %r23 = add i32 %r2i, %r3i + %r = add i32 %r01, %r23 + ret i32 %r +} + +; CHECK-LABEL: test_v4i8_u32 +; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_u32_param_0]; +; CHECK-DAG: bfe.u32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: bfe.u32 [[R1:%r[0-9+]]], [[R]], 8, 8; +; CHECK-DAG: bfe.u32 [[R2:%r[0-9+]]], [[R]], 16, 8; +; CHECK-DAG: bfe.u32 [[R3:%r[0-9+]]], [[R]], 24, 8; +; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]] +; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]] +; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]] +define i32 @test_v4i8_u32(i32 %a) { + %v = bitcast i32 %a to <4 x i8> + %r0 = extractelement <4 x i8> %v, i64 0 + %r1 = extractelement <4 x i8> %v, i64 1 + %r2 = extractelement <4 x i8> %v, i64 2 + %r3 = extractelement <4 x i8> %v, i64 3 + %r0i = zext i8 %r0 to i32 + %r1i = zext i8 %r1 to i32 + %r2i = zext i8 %r2 to i32 + %r3i = zext i8 %r3 to i32 + %r01 = add i32 %r0i, %r1i + %r23 = add i32 %r2i, %r3i + %r = add i32 %r01, %r23 + ret i32 %r +} + + + ; CHECK-LABEL: test_v8i8 ; CHECK: ld.param.u64 [[R:%rd[0-9+]]], [test_v8i8_param_0]; ; CHECK-DAG: cvt.s8.s64 [[E0:%rs[0-9+]]], [[R]]; diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index 5a22bbcf7416c17..684e4bc38d83de1 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -5,7 +5,7 @@ ; RUN: %if ptxas %{ \ ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \ ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | %ptxas-verify -arch=sm_53 \ +; RUN: | %ptxas-verify -arch=sm_90 \ ; RUN: %} ; ## No support for i16x2 instructions ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 74087be4834d966..97b1e38a3388413 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -60,20 +60,17 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<11>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_param_2]; ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r8, %r1, %r7; -; CHECK-NEXT: and.b32 %r9, %r3, %r8; -; CHECK-NEXT: or.b32 %r10, %r5, %r9; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; +; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: and.b32 %r8, %r3, %r7; +; CHECK-NEXT: or.b32 %r9, %r5, %r8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -85,20 +82,17 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; -; CHECK-NEXT: .reg .b32 %r<11>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_undef_param_2]; ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs2, %rs1}; -; CHECK-NEXT: xor.b32 %r8, %r1, %r7; -; CHECK-NEXT: and.b32 %r9, %r3, %r8; -; CHECK-NEXT: or.b32 %r10, %r5, %r9; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; +; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: and.b32 %r8, %r3, %r7; +; CHECK-NEXT: or.b32 %r9, %r5, %r8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -158,8 +152,7 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwin define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: out_v8i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_1]; @@ -167,15 +160,13 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v8i8_param_0]; ; CHECK-NEXT: and.b32 %r11, %r9, %r5; ; CHECK-NEXT: and.b32 %r13, %r10, %r6; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r15, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r16, %r6, %r15; -; CHECK-NEXT: xor.b32 %r17, %r5, %r15; -; CHECK-NEXT: and.b32 %r18, %r1, %r17; -; CHECK-NEXT: and.b32 %r19, %r2, %r16; -; CHECK-NEXT: or.b32 %r20, %r13, %r19; -; CHECK-NEXT: or.b32 %r21, %r11, %r18; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; +; CHECK-NEXT: xor.b32 %r15, %r6, -1; +; CHECK-NEXT: xor.b32 %r16, %r5, -1; +; CHECK-NEXT: and.b32 %r17, %r1, %r16; +; CHECK-NEXT: and.b32 %r18, %r2, %r15; +; CHECK-NEXT: or.b32 %r19, %r13, %r18; +; CHECK-NEXT: or.b32 %r20, %r11, %r17; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, @@ -213,8 +204,7 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-LABEL: out_v4i16_undef( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v4i16_undef_param_1]; @@ -222,15 +212,13 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) n ; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v4i16_undef_param_0]; ; CHECK-NEXT: and.b32 %r11, %r9, %r5; ; CHECK-NEXT: and.b32 %r13, %r10, %r6; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r15, {%rs2, %rs1}; -; CHECK-NEXT: xor.b32 %r16, %r6, %r15; -; CHECK-NEXT: xor.b32 %r17, %r5, -1; -; CHECK-NEXT: and.b32 %r18, %r1, %r17; -; CHECK-NEXT: and.b32 %r19, %r2, %r16; -; CHECK-NEXT: or.b32 %r20, %r13, %r19; -; CHECK-NEXT: or.b32 %r21, %r11, %r18; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; +; CHECK-NEXT: xor.b32 %r15, %r6, -65536; +; CHECK-NEXT: xor.b32 %r16, %r5, -1; +; CHECK-NEXT: and.b32 %r17, %r1, %r16; +; CHECK-NEXT: and.b32 %r18, %r2, %r15; +; CHECK-NEXT: or.b32 %r19, %r13, %r18; +; CHECK-NEXT: or.b32 %r20, %r11, %r17; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, @@ -294,8 +282,7 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwin define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: out_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<42>; +; CHECK-NEXT: .reg .b32 %r<41>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_1]; @@ -305,21 +292,19 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-NEXT: and.b32 %r23, %r18, %r10; ; CHECK-NEXT: and.b32 %r25, %r19, %r11; ; CHECK-NEXT: and.b32 %r27, %r20, %r12; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r29, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r30, %r12, %r29; -; CHECK-NEXT: xor.b32 %r31, %r11, %r29; -; CHECK-NEXT: xor.b32 %r32, %r10, %r29; -; CHECK-NEXT: xor.b32 %r33, %r9, %r29; -; CHECK-NEXT: and.b32 %r34, %r1, %r33; -; CHECK-NEXT: and.b32 %r35, %r2, %r32; -; CHECK-NEXT: and.b32 %r36, %r3, %r31; -; CHECK-NEXT: and.b32 %r37, %r4, %r30; -; CHECK-NEXT: or.b32 %r38, %r27, %r37; -; CHECK-NEXT: or.b32 %r39, %r25, %r36; -; CHECK-NEXT: or.b32 %r40, %r23, %r35; -; CHECK-NEXT: or.b32 %r41, %r21, %r34; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r41, %r40, %r39, %r38}; +; CHECK-NEXT: xor.b32 %r29, %r12, -1; +; CHECK-NEXT: xor.b32 %r30, %r11, -1; +; CHECK-NEXT: xor.b32 %r31, %r10, -1; +; CHECK-NEXT: xor.b32 %r32, %r9, -1; +; CHECK-NEXT: and.b32 %r33, %r1, %r32; +; CHECK-NEXT: and.b32 %r34, %r2, %r31; +; CHECK-NEXT: and.b32 %r35, %r3, %r30; +; CHECK-NEXT: and.b32 %r36, %r4, %r29; +; CHECK-NEXT: or.b32 %r37, %r27, %r36; +; CHECK-NEXT: or.b32 %r38, %r25, %r35; +; CHECK-NEXT: or.b32 %r39, %r23, %r34; +; CHECK-NEXT: or.b32 %r40, %r21, %r33; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r40, %r39, %r38, %r37}; ; CHECK-NEXT: ret; %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, >From e55bb97942124e2659f8132784131c74e4f6fd10 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 3 Oct 2023 17:41:20 -0700 Subject: [PATCH 3/8] Down the rabbit hole we go. To make things work consisstently for v4i8, we need to implement other vector ops. --- .../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 31 + .../NVPTX/MCTargetDesc/NVPTXInstPrinter.h | 2 + llvm/lib/Target/NVPTX/NVPTX.h | 12 + llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 11 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 168 ++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 5 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 168 ++- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 1237 +++++++++++++++++ 8 files changed, 1580 insertions(+), 54 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/i8x4-instructions.ll diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 5d27accdc198c1e..b7a20c351f5ff6f 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -309,3 +309,34 @@ void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum, const MCSymbol &Sym = cast(Expr)->getSymbol(); O << Sym.getName(); } + +void NVPTXInstPrinter::printPrmtMode(const MCInst *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MCOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + switch (Imm) { + default: + return; + case NVPTX::PTXPrmtMode::NONE: + break; + case NVPTX::PTXPrmtMode::F4E: + O << ".f4e"; + break; + case NVPTX::PTXPrmtMode::B4E: + O << ".b4e"; + break; + case NVPTX::PTXPrmtMode::RC8: + O << ".rc8"; + break; + case NVPTX::PTXPrmtMode::ECL: + O << ".ecl"; + break; + case NVPTX::PTXPrmtMode::ECR: + O << ".ecr"; + break; + case NVPTX::PTXPrmtMode::RC16: + O << ".rc16"; + break; + } +} diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index 49ad3f269229d5f..e6954f861cd10e2 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -47,6 +47,8 @@ class NVPTXInstPrinter : public MCInstPrinter { raw_ostream &O, const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); + void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); }; } diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index c5816b9266dfd9e..f7c8da372cec88c 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -180,6 +180,18 @@ enum CmpMode { FTZ_FLAG = 0x100 }; } + +namespace PTXPrmtMode { +enum PrmtMode { + NONE, + F4E, + B4E, + RC8, + ECL, + ECR, + RC16, +}; +} } void initializeNVPTXDAGToDAGISelPass(PassRegistry &); } // namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c3bcf8f05a278ad..f442188610715ee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3577,11 +3577,12 @@ bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, Vector->getValueType(0) == MVT::v4i8)) return false; - if (const ConstantSDNode *IdxConst = - dyn_cast(N->getOperand(1))) { - V = Vector; - BitOffset = CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, - SDLoc(N), MVT::i32); + SDLoc DL(N); + V = Vector; + SDValue Index = N->getOperand(1); + if (const ConstantSDNode *IdxConst = dyn_cast(Index)) { + BitOffset = + CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, DL, MVT::i32); return true; } return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 66dcdb53b136b96..b886b6e2ce5ddde 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -497,18 +497,31 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand); - // TODO: we should eventually lower it as PRMT instruction. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); + // Only logical ops can be done on v4i8 directly, others must be done + // elementwise. + setOperationAction( + {ISD::ADD, ISD::MUL, ISD::ABS, ISD::SMIN, + ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::CTPOP, + ISD::CTLZ, ISD::ADD, ISD::SUB, ISD::MUL, + ISD::SHL, ISD::SREM, ISD::UREM, ISD::SDIV, + ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, + ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, + ISD::UINT_TO_FP}, + MVT::v4i8, Expand); // Operations not directly supported by NVPTX. - for (MVT VT : - {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, MVT::f64, - MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}) { + for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, + MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8, + MVT::i32, MVT::i64}) { setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BR_CC, VT, Expand); } + // Some SIGN_EXTEND_INREG can be done using cvt instruction. // For others we will expand to a SHL/SRA pair. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); @@ -682,7 +695,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // We have some custom DAG combine patterns for these nodes setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL, - ISD::SREM, ISD::UREM, ISD::EXTRACT_VECTOR_ELT}); + ISD::SREM, ISD::UREM, ISD::EXTRACT_VECTOR_ELT, + ISD::VSELECT}); // setcc for f16x2 and bf16x2 needs special handling to prevent // legalizer's attempt to scalarize it due to v2i1 not being legal. @@ -891,6 +905,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::FUN_SHFR_CLAMP"; case NVPTXISD::IMAD: return "NVPTXISD::IMAD"; + case NVPTXISD::BFE: + return "NVPTXISD::BFE"; + case NVPTXISD::BFI: + return "NVPTXISD::BFI"; + case NVPTXISD::PRMT: + return "NVPTXISD::PRMT"; case NVPTXISD::SETP_F16X2: return "NVPTXISD::SETP_F16X2"; case NVPTXISD::Dummy: @@ -2163,18 +2183,39 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it // would get lowered as two constant loads and vector-packing move. // Instead we want just a constant move: -// mov.b32 %hh2, 0x40003C00 +// mov.b32 %r2, 0x40003C00 SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) return Op; + SDLoc DL(Op); + if (!llvm::all_of(Op->ops(), [](SDValue Operand) { return Operand->isUndef() || isa(Operand) || isa(Operand); - })) + })) { + // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us + // to optimize calculation of constant parts. + if (VT == MVT::v4i8) { + SDValue C8 = DAG.getConstant(8, DL, MVT::i32); + SDValue E01 = DAG.getNode( + NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32), + DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); + SDValue E012 = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), E01, + DAG.getConstant(16, DL, MVT::i32), C8); + SDValue E0123 = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), E012, + DAG.getConstant(24, DL, MVT::i32), C8); + return DAG.getNode(ISD::BITCAST, DL, VT, E0123); + } return Op; + } // Get value or the Nth operand as an APInt(32). Undef values treated as 0. auto GetOperand = [](SDValue Op, int N) -> APInt { @@ -2207,13 +2248,26 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Index = Op->getOperand(1); + SDValue Vector = Op->getOperand(0); + SDLoc DL(Op); + EVT VectorVT = Vector.getValueType(); + + if (VectorVT == MVT::v4i8) { + SDValue BFE = + DAG.getNode(NVPTXISD::BFE, DL, MVT::i32, + {Vector, + DAG.getNode(ISD::MUL, DL, MVT::i32, + DAG.getZExtOrTrunc(Index, DL, MVT::i32), + DAG.getConstant(8, DL, MVT::i32)), + DAG.getConstant(8, DL, MVT::i32)}); + return DAG.getZExtOrTrunc(BFE, DL, Op->getValueType(0)); + } + // Constant index will be matched by tablegen. if (isa(Index.getNode())) return Op; // Extract individual elements and select one of them. - SDValue Vector = Op->getOperand(0); - EVT VectorVT = Vector.getValueType(); assert(Isv2x16VT(VectorVT) && "Unexpected vector type."); EVT EltVT = VectorVT.getVectorElementType(); @@ -2226,6 +2280,34 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, ISD::CondCode::SETEQ); } +SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Vector = Op->getOperand(0); + EVT VectorVT = Vector.getValueType(); + + if (VectorVT != MVT::v4i8) + return Op; + SDLoc DL(Op); + SDValue Value = Op->getOperand(1); + if (Value->isUndef()) + return Vector; + + SDValue Index = Op->getOperand(2); + + SDValue BFI = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector, + DAG.getNode(ISD::MUL, DL, MVT::i32, + DAG.getZExtOrTrunc(Index, DL, MVT::i32), + DAG.getConstant(8, DL, MVT::i32)), + DAG.getConstant(8, DL, MVT::i32)}); + return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI); +} + +SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + return SDValue(); +} /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift /// amount, or @@ -2476,6 +2558,10 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Op; case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::STORE: @@ -4987,6 +5073,32 @@ static SDValue PerformANDCombine(SDNode *N, } SDValue AExt; + + // Convert BFE-> truncate i16 -> and 255 + // To just BFE-> truncate i16, as the value already has all the bits in the + // right places. + if (Val.getOpcode() == ISD::TRUNCATE) { + SDValue BFE = Val.getOperand(0); + if (BFE.getOpcode() != NVPTXISD::BFE) + return SDValue(); + + ConstantSDNode *BFEBits = dyn_cast(BFE.getOperand(0)); + if (!BFEBits) + return SDValue(); + uint64_t BFEBitsVal = BFEBits->getZExtValue(); + + ConstantSDNode *MaskCnst = dyn_cast(Mask); + if (!MaskCnst) { + // Not an AND with a constant + return SDValue(); + } + uint64_t MaskVal = MaskCnst->getZExtValue(); + + if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1) + return SDValue(); + // If we get here, the AND is unnecessary. Just replace it with the trunc + DCI.CombineTo(N, Val, false); + } // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and if (Val.getOpcode() == ISD::ANY_EXTEND) { AExt = Val; @@ -5266,6 +5378,7 @@ static SDValue PerformSETCCCombine(SDNode *N, static SDValue PerformEXTRACTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Vector = N->getOperand(0); + SDLoc DL(N); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && IsPTXVectorType(VectorVT.getSimpleVT())) @@ -5286,7 +5399,6 @@ static SDValue PerformEXTRACTCombine(SDNode *N, if (!Index || Index->getZExtValue() == 0) return SDValue(); - SDLoc DL(N); MVT IVT = MVT::getIntegerVT(VectorBits); EVT EltVT = VectorVT.getVectorElementType(); @@ -5309,6 +5421,38 @@ static SDValue PerformEXTRACTCombine(SDNode *N, return Result; } +static SDValue PerformVSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue VA = N->getOperand(1); + EVT VectorVT = VA.getValueType(); + if (VectorVT != MVT::v4i8) + return SDValue(); + + // We need to split vselect into individual per-element operations Because we + // use BFE/BFI instruction for byte extraction/insertion, we do end up with + // 32-bit values, so we may as well do comparison as i32 to avoid conversions + // to/from i16 normally used for i8 values. + SmallVector E; + SDLoc DL(N); + SDValue VCond = N->getOperand(0); + SDValue VB = N->getOperand(2); + for (int I = 0; I < 4; ++I) { + SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond, + DCI.DAG.getConstant(I, DL, MVT::i32)); + SDValue EA = DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA, + DCI.DAG.getConstant(I, DL, MVT::i32)), + DL, MVT::i32); + SDValue EB = DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB, + DCI.DAG.getConstant(I, DL, MVT::i32)), + DL, MVT::i32); + E.push_back(DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8)); + } + return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E); +} + SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel(); @@ -5334,6 +5478,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, return PerformStoreRetvalCombine(N); case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACTCombine(N, DCI); + case ISD::VSELECT: + return PerformVSELECTCombine(N, DCI); } return SDValue(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index cd1985cc4219bdf..5c7c10965e2f2ca 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -57,6 +57,9 @@ enum NodeType : unsigned { MUL_WIDE_UNSIGNED, IMAD, SETP_F16X2, + BFE, + BFI, + PRMT, Dummy, LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -590,6 +593,8 @@ class NVPTXTargetLowering : public TargetLowering { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 307963aaa800b88..2a34d050ed8f707 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -107,6 +107,21 @@ def VecElement : Operand { let PrintMethod = "printVecElement"; } +// PRMT modes +// These must match the enum in NVPTX.h +def PrmtNONE : PatLeaf<(i32 0x0)>; +def PrmtF4E : PatLeaf<(i32 0x1)>; +def PrmtB4E : PatLeaf<(i32 0x2)>; +def PrmtRC8 : PatLeaf<(i32 0x3)>; +def PrmtECL : PatLeaf<(i32 0x4)>; +def PrmtECR : PatLeaf<(i32 0x5)>; +def PrmtRC16 : PatLeaf<(i32 0x6)>; + +def PrmtMode : Operand { + let PrintMethod = "printPrmtMode"; +} + + //===----------------------------------------------------------------------===// // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// @@ -742,7 +757,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; // def v2f16imm : Operand; // defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; -foreach vt = [v2f16, v2bf16, v2i16] in { +foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>; } @@ -1738,46 +1753,119 @@ def FUNSHFRCLAMP : // restriction in PTX? // // dest and src may be int32 or int64, but start and end are always int32. -multiclass BFE { +def SDTBFE : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>; + +def SDTBFI : + SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def bfi : SDNode<"NVPTXISD::BFI", SDTBFI>; + +def SDTPRMT : + SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>,]>; +def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>; + +multiclass BFE { def rrr : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; def rri : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, i32imm:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>; def rii : NVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>; } -multiclass BFI { - def rrr + +multiclass BFI { + def rrrr : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; - def rri + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + def rrri : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; - def rii + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + def rrii : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; + def irrr + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + def irri + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + def irii + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; +} + +multiclass PRMT { + def rrr + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; + def rri + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, i32imm:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; + def rii + : NVPTXInst<(outs RC:$d), + (ins RC:$a, i32imm:$b, i32imm:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; } let hasSideEffects = false in { - defm BFE_S32 : BFE<"bfe.s32", Int32Regs>; - defm BFE_U32 : BFE<"bfe.u32", Int32Regs>; - defm BFE_S64 : BFE<"bfe.s64", Int64Regs>; - defm BFE_U64 : BFE<"bfe.u64", Int64Regs>; + defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>; + defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>; + defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>; + defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>; - defm BFI_B32 : BFI<"bfi.b32", Int32Regs>; - defm BFI_B64 : BFI<"bfi.b64", Int64Regs>; + defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>; + defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>; + + defm PRMT_B32 : PRMT; } -// Common byte extraction patterns + +// byte extraction + signed/unsigned extension to i32. +def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), i8)), + (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>; +def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), i8)), + (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; +def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), 255)), + (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>; +def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), 255)), + (BFE_U32rii Int32Regs:$s, imm:$o, 8)>; + +// byte extraction + signed extension to i16 +def : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8)), i8)), + (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; + + +// Byte extraction via shift/trunc/sext def : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)), (CVT_s8_s32 Int32Regs:$s, CvtNONE)>; def : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s), (i32 imm:$o))), i8)), @@ -1786,7 +1874,6 @@ def : Pat<(sext_inreg (srl (i32 Int32Regs:$s), (i32 imm:$o)), i8), (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; def : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))), (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>; - def : Pat<(sext_inreg (srl (i64 Int64Regs:$s), (i32 imm:$o)), i8), (BFE_S64rii Int64Regs:$s, imm:$o, 8)>; def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), @@ -1794,24 +1881,6 @@ def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; -def ExtractFromV4I8 : ComplexPattern; -def: Pat<(i32 (sext_inreg (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), i8)), - (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8)>; -def: Pat<(i32 (and (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), 255)), - (BFE_U32rii Int32Regs:$src, imm:$bitidx, 8)>; -def: Pat<(i16 (sext_inreg (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), i8)), - (CVT_s8_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; -def: Pat<(ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), - (CVT_s16_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; - - -def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), - (i16 Int16Regs:$c), (i16 Int16Regs:$d))), - (BFI_B32rii - (BFI_B32rii (CVT_u32_u16 Int16Regs:$d, CvtNONE), (CVT_u32_u16 Int16Regs:$c, CvtNONE), 8, 8), - (BFI_B32rii (CVT_u32_u16 Int16Regs:$b, CvtNONE), (CVT_u32_u16 Int16Regs:$a, CvtNONE), 8, 8), - 16, 16)>; - //----------------------------------- // Comparison instructions (setp, set) //----------------------------------- @@ -2141,6 +2210,29 @@ def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +// comparisons of i8 extracted with BFE as i32 +def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>; +def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>; +def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>; +def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>; + +def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGTU)>; +def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGEU)>; +def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLTU)>; +def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLEU)>; +def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>; +def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>; + // i1 compare -> i32 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll new file mode 100644 index 000000000000000..3b13ac02a7b923b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -0,0 +1,1237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; ## Support i16x2 instructions +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \ +; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ +; RUN: | FileCheck -allow-deprecated-dag-overlap %s +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 \ +; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ +; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: %} + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define <4 x i8> @test_ret_const() #0 { +; CHECK-LABEL: test_ret_const( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u32 %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <4 x i8> +} + +define i8 @test_extract_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_0_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 0 + ret i8 %e +} + +define i8 @test_extract_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_1_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 8, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 1 + ret i8 %e +} + +define i8 @test_extract_2(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_2_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 16, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 2 + ret i8 %e +} + +define i8 @test_extract_3(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_3_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 3 + ret i8 %e +} + +define i8 @test_extract_i(<4 x i8> %a, i64 %idx) #0 { +; CHECK-LABEL: test_extract_i( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_extract_i_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_i_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; +; CHECK-NEXT: shl.b32 %r3, %r2, 3; +; CHECK-NEXT: bfe.u32 %r4, %r1, %r3, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i64 %idx + ret i8 %e +} + +define <4 x i8> @test_add(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_add( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_add_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_add_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: add.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: add.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: add.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = add <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_add_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_add_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<9>; +; CHECK-NEXT: .reg .b32 %r<14>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_add_imm_0_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; +; CHECK-NEXT: add.s16 %rs4, %rs3, 2; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, 3; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: add.s16 %rs8, %rs7, 4; +; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; +; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: ret; + %r = add <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_add_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_add_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<9>; +; CHECK-NEXT: .reg .b32 %r<14>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_add_imm_1_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; +; CHECK-NEXT: add.s16 %rs4, %rs3, 2; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, 3; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: add.s16 %rs8, %rs7, 4; +; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; +; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: ret; + %r = add <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_sub(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_sub( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_sub_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_sub_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: sub.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: sub.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: sub.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: sub.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = sub <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_smax( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_smax_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_smax_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.gt.s32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.gt.s32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.gt.s32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.gt.s32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp sgt <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_umax( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_umax_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_umax_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.gtu.u32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.gtu.u32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.gtu.u32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.gtu.u32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp ugt <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_smin( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_smin_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_smin_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.le.s32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.le.s32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.le.s32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.le.s32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp sle <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_umin( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_umin_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_umin_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.leu.u32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.leu.u32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.leu.u32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.leu.u32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp ule <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { +; CHECK-LABEL: test_eq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<24>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_eq_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_eq_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_eq_param_0]; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; +; CHECK-NEXT: setp.eq.u32 %p1, %r5, %r4; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: setp.eq.u32 %p2, %r7, %r6; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: setp.eq.u32 %p3, %r9, %r8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r11, %r1, 0, 8; +; CHECK-NEXT: setp.eq.u32 %p4, %r11, %r10; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r3, 8, 8; +; CHECK-NEXT: selp.b32 %r15, %r9, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r16, %r15, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r3, 16, 8; +; CHECK-NEXT: selp.b32 %r18, %r7, %r17, %p2; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r16, 16, 8; +; CHECK-NEXT: bfe.s32 %r20, %r3, 24, 8; +; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; +; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: ret; + %cmp = icmp eq <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c + ret <4 x i8> %r +} + +define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { +; CHECK-LABEL: test_ne( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<24>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_ne_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_ne_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_ne_param_0]; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r5, %r4; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r7, %r6; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r9, %r8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r11, %r1, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r11, %r10; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r3, 8, 8; +; CHECK-NEXT: selp.b32 %r15, %r9, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r16, %r15, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r3, 16, 8; +; CHECK-NEXT: selp.b32 %r18, %r7, %r17, %p2; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r16, 16, 8; +; CHECK-NEXT: bfe.s32 %r20, %r3, 24, 8; +; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; +; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: ret; + %cmp = icmp ne <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c + ret <4 x i8> %r +} + +define <4 x i8> @test_mul(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_mul( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_mul_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_mul_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: mul.lo.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: mul.lo.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: mul.lo.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: mul.lo.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = mul <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_or(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_or( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_or_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_or_param_0]; +; CHECK-NEXT: or.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = or <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_or_computed(i8 %a) { +; CHECK-LABEL: test_or_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_or_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: or.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = or <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_or_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_or_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_0_param_0]; +; CHECK-NEXT: or.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = or <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_or_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_or_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_1_param_0]; +; CHECK-NEXT: or.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = or <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_xor(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_xor( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_xor_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_xor_param_0]; +; CHECK-NEXT: xor.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = xor <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_computed(i8 %a) { +; CHECK-LABEL: test_xor_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_xor_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: xor.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = xor <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_xor_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_0_param_0]; +; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = xor <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_xor_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_1_param_0]; +; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = xor <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_and(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_and( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_and_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_and_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = and <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_and_computed(i8 %a) { +; CHECK-LABEL: test_and_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_and_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: and.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = and <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_and_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_and_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_0_param_0]; +; CHECK-NEXT: and.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = and <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_and_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_and_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_1_param_0]; +; CHECK-NEXT: and.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = and <4 x i8> %a, + ret <4 x i8> %r +} + +define void @test_ldst_v2i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v2i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v2i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v2i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <4 x i8>, ptr %a + store <4 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v3i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v3i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v3i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v3i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u16 [%rd2], %r1; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: st.u8 [%rd2+2], %r3; +; CHECK-NEXT: ret; + %t1 = load <3 x i8>, ptr %a + store <3 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v4i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <4 x i8>, ptr %a + store <4 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v8i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v8i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v8i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v8i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd1+4]; +; CHECK-NEXT: st.u32 [%rd2+4], %r2; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <8 x i8>, ptr %a + store <8 x i8> %t1, ptr %b, align 16 + ret void +} + +declare <4 x i8> @test_callee(<4 x i8> %a, <4 x i8> %b) #0 + +define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_call( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_call_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_call_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = call <4 x i8> @test_callee(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %r +} + +define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_call_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_call_flipped_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_call_flipped_param_0]; +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) + ret <4 x i8> %r +} + +define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_tailcall_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_tailcall_flipped_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_tailcall_flipped_param_0]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = tail call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) + ret <4 x i8> %r +} + +define <4 x i8> @test_select(<4 x i8> %a, <4 x i8> %b, i1 zeroext %c) #0 { +; CHECK-LABEL: test_select( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2]; +; CHECK-NEXT: and.b16 %rs2, %rs1, 1; +; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_param_0]; +; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = select i1 %c, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) #0 { +; CHECK-LABEL: test_select_cc( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<29>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r4, [test_select_cc_param_3]; +; CHECK-NEXT: ld.param.u32 %r3, [test_select_cc_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_cc_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_cc_param_0]; +; CHECK-NEXT: bfe.s32 %r5, %r4, 24, 8; +; CHECK-NEXT: bfe.s32 %r6, %r3, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r6, %r5; +; CHECK-NEXT: bfe.s32 %r7, %r4, 16, 8; +; CHECK-NEXT: bfe.s32 %r8, %r3, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r8, %r7; +; CHECK-NEXT: bfe.s32 %r9, %r4, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r3, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r10, %r9; +; CHECK-NEXT: bfe.s32 %r11, %r4, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r12, %r11; +; CHECK-NEXT: bfe.s32 %r13, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r14, %r1, 0, 8; +; CHECK-NEXT: selp.b32 %r15, %r14, %r13, %p4; +; CHECK-NEXT: bfe.s32 %r16, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r1, 8, 8; +; CHECK-NEXT: selp.b32 %r18, %r17, %r16, %p3; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r15, 8, 8; +; CHECK-NEXT: bfe.s32 %r20, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r21, %r1, 16, 8; +; CHECK-NEXT: selp.b32 %r22, %r21, %r20, %p2; +; CHECK-NEXT: bfi.b32 %r23, %r22, %r19, 16, 8; +; CHECK-NEXT: bfe.s32 %r24, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r25, %r1, 24, 8; +; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p1; +; CHECK-NEXT: bfi.b32 %r27, %r26, %r23, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r27; +; CHECK-NEXT: ret; + %cc = icmp ne <4 x i8> %c, %d + %r = select <4 x i1> %cc, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b, +; CHECK-LABEL: test_select_cc_i32_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<23>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [test_select_cc_i32_i8_param_1]; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [test_select_cc_i32_i8_param_0]; +; CHECK-NEXT: ld.param.u32 %r10, [test_select_cc_i32_i8_param_3]; +; CHECK-NEXT: ld.param.u32 %r9, [test_select_cc_i32_i8_param_2]; +; CHECK-NEXT: bfe.s32 %r11, %r10, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r9, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r12, %r11; +; CHECK-NEXT: bfe.s32 %r13, %r10, 8, 8; +; CHECK-NEXT: bfe.s32 %r14, %r9, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r14, %r13; +; CHECK-NEXT: bfe.s32 %r15, %r10, 16, 8; +; CHECK-NEXT: bfe.s32 %r16, %r9, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r16, %r15; +; CHECK-NEXT: bfe.s32 %r17, %r10, 24, 8; +; CHECK-NEXT: bfe.s32 %r18, %r9, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r18, %r17; +; CHECK-NEXT: selp.b32 %r19, %r4, %r8, %p4; +; CHECK-NEXT: selp.b32 %r20, %r3, %r7, %p3; +; CHECK-NEXT: selp.b32 %r21, %r2, %r6, %p2; +; CHECK-NEXT: selp.b32 %r22, %r1, %r5, %p1; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r22, %r21, %r20, %r19}; +; CHECK-NEXT: ret; + <4 x i8> %c, <4 x i8> %d) #0 { + %cc = icmp ne <4 x i8> %c, %d + %r = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %r +} + +define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b, +; CHECK-LABEL: test_select_cc_i8_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<27>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_select_cc_i8_i32_param_3]; +; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_select_cc_i8_i32_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_cc_i8_i32_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_cc_i8_i32_param_0]; +; CHECK-NEXT: setp.ne.s32 %p1, %r6, %r10; +; CHECK-NEXT: setp.ne.s32 %p2, %r5, %r9; +; CHECK-NEXT: setp.ne.s32 %p3, %r4, %r8; +; CHECK-NEXT: setp.ne.s32 %p4, %r3, %r7; +; CHECK-NEXT: bfe.s32 %r11, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r1, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r12, %r11, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r15, %r1, 8, 8; +; CHECK-NEXT: selp.b32 %r16, %r15, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r18, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r19, %r1, 16, 8; +; CHECK-NEXT: selp.b32 %r20, %r19, %r18, %p2; +; CHECK-NEXT: bfi.b32 %r21, %r20, %r17, 16, 8; +; CHECK-NEXT: bfe.s32 %r22, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r23, %r1, 24, 8; +; CHECK-NEXT: selp.b32 %r24, %r23, %r22, %p1; +; CHECK-NEXT: bfi.b32 %r25, %r24, %r21, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r25; +; CHECK-NEXT: ret; + <4 x i32> %c, <4 x i32> %d) #0 { + %cc = icmp ne <4 x i32> %c, %d + %r = select <4 x i1> %cc, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + + +define <4 x i8> @test_trunc_2xi32(<4 x i32> %a) #0 { +; CHECK-LABEL: test_trunc_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [test_trunc_2xi32_param_0]; +; CHECK-NEXT: bfi.b32 %r5, %r2, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r6, %r3, %r5, 16, 8; +; CHECK-NEXT: bfi.b32 %r7, %r4, %r6, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: ret; + %r = trunc <4 x i32> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i8> @test_trunc_2xi64(<4 x i64> %a) #0 { +; CHECK-LABEL: test_trunc_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [test_trunc_2xi64_param_0+16]; +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [test_trunc_2xi64_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; +; CHECK-NEXT: cvt.u32.u64 %r2, %rd2; +; CHECK-NEXT: bfi.b32 %r3, %r2, %r1, 8, 8; +; CHECK-NEXT: cvt.u32.u64 %r4, %rd3; +; CHECK-NEXT: bfi.b32 %r5, %r4, %r3, 16, 8; +; CHECK-NEXT: cvt.u32.u64 %r6, %rd4; +; CHECK-NEXT: bfi.b32 %r7, %r6, %r5, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: ret; + %r = trunc <4 x i64> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i32> @test_zext_2xi32(<4 x i8> %a) #0 { +; CHECK-LABEL: test_zext_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_zext_2xi32_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; +; CHECK-NEXT: bfe.u32 %r3, %r1, 16, 8; +; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8; +; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r5, %r4, %r3, %r2}; +; CHECK-NEXT: ret; + %r = zext <4 x i8> %a to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i64> @test_zext_2xi64(<4 x i8> %a) #0 { +; CHECK-LABEL: test_zext_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_zext_2xi64_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: and.b64 %rd2, %rd1, 255; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: cvt.u64.u32 %rd3, %r3; +; CHECK-NEXT: and.b64 %rd4, %rd3, 255; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u64.u32 %rd5, %r4; +; CHECK-NEXT: and.b64 %rd6, %rd5, 255; +; CHECK-NEXT: bfe.s32 %r5, %r1, 0, 8; +; CHECK-NEXT: cvt.u64.u32 %rd7, %r5; +; CHECK-NEXT: and.b64 %rd8, %rd7, 255; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd8, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd4, %rd2}; +; CHECK-NEXT: ret; + %r = zext <4 x i8> %a to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i8> @test_bitcast_i32_to_2xi8(i32 %a) #0 { +; CHECK-LABEL: test_bitcast_i32_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_i32_to_2xi8_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %r = bitcast i32 %a to <4 x i8> + ret <4 x i8> %r +} + +define i32 @test_bitcast_2xi8_to_i32(<4 x i8> %a) #0 { +; CHECK-LABEL: test_bitcast_2xi8_to_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_bitcast_2xi8_to_i32_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = bitcast <4 x i8> %a to i32 + ret i32 %r +} + +define <2 x half> @test_bitcast_2xi8_to_2xhalf(i8 %a) #0 { +; CHECK-LABEL: test_bitcast_2xi8_to_2xhalf( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_bitcast_2xi8_to_2xhalf_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 5, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 6, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 7, %r3, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> undef, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %ins.2 = insertelement <4 x i8> %ins.1, i8 6, i32 2 + %ins.3 = insertelement <4 x i8> %ins.2, i8 7, i32 3 + %r = bitcast <4 x i8> %ins.3 to <2 x half> + ret <2 x half> %r +} + + +define <4 x i8> @test_shufflevector(<4 x i8> %a) #0 { +; CHECK-LABEL: test_shufflevector( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, %r3, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r4, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %s = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> + ret <4 x i8> %s +} + +define <4 x i8> @test_insertelement(<4 x i8> %a, i8 %x) #0 { +; CHECK-LABEL: test_insertelement( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_insertelement_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_insertelement_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; +; CHECK-NEXT: bfi.b32 %r3, %r2, %r1, 8, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %i = insertelement <4 x i8> %a, i8 %x, i64 1 + ret <4 x i8> %i +} + +define <4 x i8> @test_fptosi_2xhalf_to_2xi8(<4 x half> %a) #0 { +; CHECK-LABEL: test_fptosi_2xhalf_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [test_fptosi_2xhalf_to_2xi8_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs3, %rs2; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs4, %rs1; +; CHECK-NEXT: mov.b32 %r5, {%rs4, %rs3}; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5; +; CHECK-NEXT: cvt.u32.u16 %r6, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs6; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 8, 8; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs9, %rs8; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs10, %rs7; +; CHECK-NEXT: mov.b32 %r9, {%rs10, %rs9}; +; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r9; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs11; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: ret; + %r = fptosi <4 x half> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 { +; CHECK-LABEL: test_fptoui_2xhalf_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [test_fptoui_2xhalf_to_2xi8_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs3, %rs2; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs4, %rs1; +; CHECK-NEXT: mov.b32 %r5, {%rs4, %rs3}; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5; +; CHECK-NEXT: cvt.u32.u16 %r6, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs6; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 8, 8; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs9, %rs8; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs10, %rs7; +; CHECK-NEXT: mov.b32 %r9, {%rs10, %rs9}; +; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r9; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs11; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: ret; + %r = fptoui <4 x half> %a to <4 x i8> + ret <4 x i8> %r +} + +attributes #0 = { nounwind } >From 655c6d5bef8f016335643ad75465d22e216168e0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 14:22:16 -0700 Subject: [PATCH 4/8] Added vector_shuffle lowering to PRMT. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 17 +++++++++++- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 6 ++-- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 29 ++++++++++++++------ 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b886b6e2ce5ddde..701d9912150d955 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2306,7 +2306,22 @@ SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - return SDValue(); + SDValue V1 = Op.getOperand(0); + EVT VectorVT = V1.getValueType(); + if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8) + return Op; + + // Lower shuffle to PRMT instruction. + const ShuffleVectorSDNode *SVN = cast(Op.getNode()); + SDValue V2 = Op.getOperand(1); + uint32_t Selector = 0; + for (auto I: llvm::enumerate(SVN->getMask())) + Selector |= (I.value() << (I.index() * 4)); + + SDLoc DL(Op); + return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2, + DAG.getConstant(Selector, DL, MVT::i32), + DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32)); } /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 2a34d050ed8f707..9d0bcbf3e8f50dc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1822,17 +1822,17 @@ multiclass BFI { multiclass PRMT { def rrr : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, Int32Regs:$c, i32imm:$mode), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; def rri : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, i32imm:$c, i32imm:$mode), + (ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; def rii : NVPTXInst<(outs RC:$d), - (ins RC:$a, i32imm:$b, i32imm:$c, i32imm:$mode), + (ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; } diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 3b13ac02a7b923b..97e33c2f7eefc26 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1138,23 +1138,34 @@ define <2 x half> @test_bitcast_2xi8_to_2xhalf(i8 %a) #0 { define <4 x i8> @test_shufflevector(<4 x i8> %a) #0 { ; CHECK-LABEL: test_shufflevector( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_param_0]; -; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; -; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; -; CHECK-NEXT: bfi.b32 %r4, %r3, %r2, 8, 8; -; CHECK-NEXT: bfe.s32 %r5, %r1, 8, 8; -; CHECK-NEXT: bfi.b32 %r6, %r5, %r4, 16, 8; -; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; -; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: // implicit-def: %r3 +; CHECK-NEXT: prmt.b32 %r2, %r1, %r3, 291; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; ; CHECK-NEXT: ret; %s = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> ret <4 x i8> %s } +define <4 x i8> @test_shufflevector_2(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_shufflevector_2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_shufflevector_2_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_2_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r1, %r2, 9527; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %s = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %s +} + + define <4 x i8> @test_insertelement(<4 x i8> %a, i8 %x) #0 { ; CHECK-LABEL: test_insertelement( ; CHECK: { >From f915e5b855ce969a234cf644413132fe1742fac0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 14:44:01 -0700 Subject: [PATCH 5/8] Address clang-format complaints. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 701d9912150d955..da78eebb42ed0d9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -521,7 +521,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BR_CC, VT, Expand); } - // Some SIGN_EXTEND_INREG can be done using cvt instruction. // For others we will expand to a SHL/SRA pair. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); @@ -2206,12 +2205,12 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); SDValue E012 = DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, - DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), E01, - DAG.getConstant(16, DL, MVT::i32), C8); + DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), + E01, DAG.getConstant(16, DL, MVT::i32), C8); SDValue E0123 = DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, - DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), E012, - DAG.getConstant(24, DL, MVT::i32), C8); + DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), + E012, DAG.getConstant(24, DL, MVT::i32), C8); return DAG.getNode(ISD::BITCAST, DL, VT, E0123); } return Op; @@ -5414,7 +5413,6 @@ static SDValue PerformEXTRACTCombine(SDNode *N, if (!Index || Index->getZExtValue() == 0) return SDValue(); - MVT IVT = MVT::getIntegerVT(VectorBits); EVT EltVT = VectorVT.getVectorElementType(); EVT EltIVT = EltVT.changeTypeToInteger(); >From ef3d5dee67581fd9b9644cf1e0ac54514ee4a884 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 15:31:58 -0700 Subject: [PATCH 6/8] Use .lo/ls/hi/hs suffixes for unsigned setp instructions. Removed unused code. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 18 ------------------ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 - llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 12 ++++++++---- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 16 ++++++++-------- 5 files changed, 17 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index f442188610715ee..68391cdb6ff172b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3570,24 +3570,6 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); } -bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, - SDValue &BitOffset) { - SDValue Vector = N->getOperand(0); - if (!(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Vector->getValueType(0) == MVT::v4i8)) - return false; - - SDLoc DL(N); - V = Vector; - SDValue Index = N->getOperand(1); - if (const ConstantSDNode *IdxConst = dyn_cast(Index)) { - BitOffset = - CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, DL, MVT::i32); - return true; - } - return false; -} - bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 34b5dd449ce086f..06922331f5e2059 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -95,7 +95,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { SDValue &Offset); bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectExtractEltFromV4I8(SDValue N, SDValue &Value, SDValue &Idx); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index da78eebb42ed0d9..8d7a29198d61a11 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2259,7 +2259,7 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, DAG.getZExtOrTrunc(Index, DL, MVT::i32), DAG.getConstant(8, DL, MVT::i32)), DAG.getConstant(8, DL, MVT::i32)}); - return DAG.getZExtOrTrunc(BFE, DL, Op->getValueType(0)); + return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0)); } // Constant index will be matched by tablegen. diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 9d0bcbf3e8f50dc..3c9d8167e689a56 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -76,6 +76,10 @@ def CmpLT : PatLeaf<(i32 2)>; def CmpLE : PatLeaf<(i32 3)>; def CmpGT : PatLeaf<(i32 4)>; def CmpGE : PatLeaf<(i32 5)>; +def CmpLO : PatLeaf<(i32 6)>; +def CmpLS : PatLeaf<(i32 7)>; +def CmpHI : PatLeaf<(i32 8)>; +def CmpHS : PatLeaf<(i32 9)>; def CmpEQU : PatLeaf<(i32 10)>; def CmpNEU : PatLeaf<(i32 11)>; def CmpLTU : PatLeaf<(i32 12)>; @@ -2221,13 +2225,13 @@ def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32R (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>; def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGTU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>; def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGEU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>; def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLTU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>; def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLEU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>; def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>; def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 97e33c2f7eefc26..641f2f36f95b353 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -293,16 +293,16 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_umax_param_0]; ; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; ; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; -; CHECK-NEXT: setp.gtu.u32 %p1, %r3, %r4; +; CHECK-NEXT: setp.hi.u32 %p1, %r3, %r4; ; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; ; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; -; CHECK-NEXT: setp.gtu.u32 %p2, %r5, %r6; +; CHECK-NEXT: setp.hi.u32 %p2, %r5, %r6; ; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; ; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; -; CHECK-NEXT: setp.gtu.u32 %p3, %r7, %r8; +; CHECK-NEXT: setp.hi.u32 %p3, %r7, %r8; ; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; ; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; -; CHECK-NEXT: setp.gtu.u32 %p4, %r9, %r10; +; CHECK-NEXT: setp.hi.u32 %p4, %r9, %r10; ; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; ; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; @@ -363,16 +363,16 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_umin_param_0]; ; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; ; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; -; CHECK-NEXT: setp.leu.u32 %p1, %r3, %r4; +; CHECK-NEXT: setp.ls.u32 %p1, %r3, %r4; ; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; ; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; -; CHECK-NEXT: setp.leu.u32 %p2, %r5, %r6; +; CHECK-NEXT: setp.ls.u32 %p2, %r5, %r6; ; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; ; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; -; CHECK-NEXT: setp.leu.u32 %p3, %r7, %r8; +; CHECK-NEXT: setp.ls.u32 %p3, %r7, %r8; ; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; ; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; -; CHECK-NEXT: setp.leu.u32 %p4, %r9, %r10; +; CHECK-NEXT: setp.ls.u32 %p4, %r9, %r10; ; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; ; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; >From 9821e908e676d8eedbee7c07c90fb5aae4454f82 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 6 Oct 2023 12:22:41 -0700 Subject: [PATCH 7/8] Fixed calculation of constant v4i8 values. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++++ llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8d7a29198d61a11..6a62e228e8efb39 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2229,6 +2229,10 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, Value = cast(Operand)->getAPIntValue(); else llvm_unreachable("Unsupported type"); + // i8 values are carried around as i16, so we need to zero out upper bits, + // so they do not get in the way of combining individual byte values + if (VT == MVT::v4i8) + Value = Value.trunc(8); return Value.zext(32); }; APInt Value; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 641f2f36f95b353..c429bf23417f951 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -17,10 +17,10 @@ define <4 x i8> @test_ret_const() #0 { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u32 %r1, 67305985; +; CHECK-NEXT: mov.u32 %r1, -66911489; ; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; ; CHECK-NEXT: ret; - ret <4 x i8> + ret <4 x i8> } define i8 @test_extract_0(<4 x i8> %a) #0 { >From 3879bdb03da707fd0fc02e2f92d5c8733a52de1f Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 6 Oct 2023 14:17:39 -0700 Subject: [PATCH 8/8] Updated a test. --- .../CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 97b1e38a3388413..8633b09af04873c 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -89,7 +89,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: xor.b32 %r7, %r1, -16711681; ; CHECK-NEXT: and.b32 %r8, %r3, %r7; ; CHECK-NEXT: or.b32 %r9, %r5, %r8; ; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; From lldb-commits at lists.llvm.org Fri Oct 6 14:30:41 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:41 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c81.170a0220.8d95d.ed86@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); ---------------- adrian-prantl wrote: You can use `error_stream << "..." << m_fixed_expression << "\n"` then you don't need to convert to c_str() https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:42 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c82.170a0220.97e56.cb97@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", ---------------- adrian-prantl wrote: Is this printed before or after the output? If it's printed after, past tense might make more sense? https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:42 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c82.170a0220.603b1.e990@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). ---------------- adrian-prantl wrote: I'm not sure I understand what the second sentence is trying to convey. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:42 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:42 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c82.a70a0220.b10a1.63f0@mx.google.com> ================ @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr -l c++ -- class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], ---------------- adrian-prantl wrote: ditto https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:43 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c83.170a0220.2c4cc.f2ea@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], ---------------- adrian-prantl wrote: maybe add `; 1` at the end, so it's not an error? https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:43 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c83.a70a0220.6c45a.680d@mx.google.com> ================ @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr -l c++ -- class C { int i; void f() { []() { ++i; }(); } }", ---------------- adrian-prantl wrote: the `-l c++` is redundant https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:43 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Fri, 06 Oct 2023 14:30:43 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c83.170a0220.d3f21.40e9@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", ---------------- adrian-prantl wrote: this should call the underlying `dwim-print -O --` command and not rely on the alias being there https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:30:49 2023 From: lldb-commits at lists.llvm.org (via lldb-commits) Date: Fri, 06 Oct 2023 14:30:49 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65207c89.170a0220.b4066.ed6c@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff daca97216cf132d733513f992d49e3c722aabf40 1d0ac08d38a33ae70687f7b125367c39fbcf92f3 -- lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp lldb/test/API/lang/cpp/expression-fixit/main.cpp lldb/source/Commands/CommandObjectDWIMPrint.cpp lldb/source/Commands/CommandObjectDWIMPrint.h lldb/source/Commands/CommandObjectExpression.cpp lldb/source/Commands/CommandObjectExpression.h ``````````
View the diff from clang-format here. ``````````diff diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 8a5be3e1cd1c..1f07cdf4c8a1 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,15 +172,17 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); - + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + // Only mention Fix-Its if the command applies them. - // The compiler errors can address any parsing issues after applying Fix-It(s). + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { Stream &error_stream = result.GetErrorStream(); - error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", - m_fixed_expression.c_str()); + error_stream.Printf( + " Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); } if (expr_result == eExpressionCompleted) { diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 82283d0fe6a0..72617f9a8bf7 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -440,10 +440,12 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); // Only mention Fix-Its if the command applies them. - // The compiler errors can address any parsing issues after applying Fix-It(s). + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", - m_fixed_expression.c_str()); + error_stream.Printf( + " Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); } if (result_valobj_sp) { diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp index 3ead9e4957a6..e9cf11d18a65 100644 --- a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -1,5 +1,4 @@ -int main() -{ +int main() { long foo = 1234; return 0; // break here diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp index 3ead9e4957a6..e9cf11d18a65 100644 --- a/lldb/test/API/lang/cpp/expression-fixit/main.cpp +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -1,5 +1,4 @@ -int main() -{ +int main() { long foo = 1234; return 0; // break here ``````````
https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:50:09 2023 From: lldb-commits at lists.llvm.org (Felipe de Azevedo Piovezan via lldb-commits) Date: Fri, 06 Oct 2023 14:50:09 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208111.170a0220.92e38.ecab@mx.google.com> felipepiovezan wrote: The code is not something I am familiar with, but I did notice that the PR title and the commit title are quite different. Not sure which one is better, but it is generally nice to have them match (in particular it is nice to add the `[lldb]` tag to the commit title to make `git log` nice to browse) https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 14:50:40 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 14:50:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208130.170a0220.a71df.ec04@mx.google.com> https://github.com/PortalPete updated https://github.com/llvm/llvm-project/pull/68452 >From 3a17d92ccaf71bd1f04035278af7b9999e5eb5c3 Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] Modify `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 15 +++++++++-- lldb/source/Commands/CommandObjectDWIMPrint.h | 4 +++ .../Commands/CommandObjectExpression.cpp | 10 ++++--- .../source/Commands/CommandObjectExpression.h | 4 ++- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 5 ++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 5 ++++ 10 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..1f07cdf4c8a167b 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,19 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf( + " Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.h b/lldb/source/Commands/CommandObjectDWIMPrint.h index 3fc6c01d4729707..b4e68495b17af07 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.h +++ b/lldb/source/Commands/CommandObjectDWIMPrint.h @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; + private: bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..72617f9a8bf7a71 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,13 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream.Printf( + " Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); } if (result_valobj_sp) { diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h index b2b8fc73a1ee831..276da559fb4dbd6 100644 --- a/lldb/source/Commands/CommandObjectExpression.h +++ b/lldb/source/Commands/CommandObjectExpression.h @@ -99,7 +99,9 @@ class CommandObjectExpression : public CommandObjectRaw, CommandOptions m_command_options; uint32_t m_expr_line_count; std::string m_expr_lines; // Multi-line expression support - std::string m_fixed_expression; // Holds the current expression's fixed text. + + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; }; } // namespace lldb_private diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..ad92d0216126965 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..7517d911723d851 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr -l c++ -- class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Fri Oct 6 14:58:52 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 14:58:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520831c.170a0220.d69aa.e8aa@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", ---------------- PortalPete wrote: This is the _only_ output. Technically the print comes after the change, but it's all happening "now", in the present tense, from the developers perspective. I can change it back to past tense, but I don't know that detail would be relevant to anyone other than someone working on LLDB itself. Even then, that person can just step through debugger and see when the change happened relative to the print statement. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:01:53 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:01:53 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <652083d1.170a0220.15045.ec21@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). ---------------- PortalPete wrote: I did my best to reword/convert this comment from `CommandObjectExpression.cpp`: ``` // We only tell you about the FixIt if we applied it. The compiler errors // will suggest the FixIt if it parsed. ``` https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:04:01 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:04:01 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208451.170a0220.ebe69.d402@mx.google.com> https://github.com/PortalPete edited https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:04:29 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Fri, 06 Oct 2023 15:04:29 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520846d.630a0220.287263.c548@mx.google.com> ================ @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; ---------------- kastiglione wrote: It appears this doesn't need to be a member variable, that it can be a local variable in `CommandObjectDWIMPrint::DoExecute`. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:05:54 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Fri, 06 Oct 2023 15:05:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <652084c2.a70a0220.2e816.6701@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): ---------------- kastiglione wrote: This should be named either `TestCase` by itself, or should match the name of the file. I prefer `TestCase`, but it's up to you. Additionally, class names typically don't use underscores in their name. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:06:58 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Fri, 06 Oct 2023 15:06:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208502.620a0220.d3010.69f2@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." ---------------- kastiglione wrote: This can be a multiline string, instead of using `\` line continuations. ```suggestion """ Confirm that the `po` command (alias) applies a FixIt and prints it out to the console, just like the `expression` command. """ ``` https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:24:15 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:24:15 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520890f.a70a0220.27927.6620@mx.google.com> https://github.com/PortalPete updated https://github.com/llvm/llvm-project/pull/68452 >From 7def3f8e3a04ee9efbaef194bd60eec81fb32c25 Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] Modify `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 14 ++++++++-- lldb/source/Commands/CommandObjectDWIMPrint.h | 4 +++ .../Commands/CommandObjectExpression.cpp | 9 ++++--- .../source/Commands/CommandObjectExpression.h | 4 ++- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 5 ++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 5 ++++ 10 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..c43cc45ddbc01f2 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.h b/lldb/source/Commands/CommandObjectDWIMPrint.h index 3fc6c01d4729707..b4e68495b17af07 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.h +++ b/lldb/source/Commands/CommandObjectDWIMPrint.h @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; + private: bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..0c7cebe6a020d87 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,12 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; } if (result_valobj_sp) { diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h index b2b8fc73a1ee831..276da559fb4dbd6 100644 --- a/lldb/source/Commands/CommandObjectExpression.h +++ b/lldb/source/Commands/CommandObjectExpression.h @@ -99,7 +99,9 @@ class CommandObjectExpression : public CommandObjectRaw, CommandOptions m_command_options; uint32_t m_expr_line_count; std::string m_expr_lines; // Multi-line expression support - std::string m_fixed_expression; // Holds the current expression's fixed text. + + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; }; } // namespace lldb_private diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..ffd348e16a7e01c --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "dwim-print -O -- class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..6559860f0ddf17d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Fri Oct 6 15:24:34 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:24:34 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208922.050a0220.1b278.6514@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream.Printf(" Applying Fix-It to expression, changing it to:\n %s\n", + m_fixed_expression.c_str()); ---------------- PortalPete wrote: NIce! That actually makes the code cleaner too. ```cpp if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { error_stream << " Applying Fix-It to expression, changing it to:\n"; error_stream << " " << m_fixed_expression << "\n"; } ``` https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:26:07 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:26:07 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520897f.620a0220.c9ef3.67ff@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], ---------------- PortalPete wrote: I tried it but the unit test still thinks there's an error, so I have to leave `error = True` in there until we figure that one out. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:27:04 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:27:04 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <652089b8.170a0220.ec5b9.3c36@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "po class C { int i; void f() { []() { ++i; }(); } }", ---------------- PortalPete wrote: Done! https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:29:17 2023 From: lldb-commits at lists.llvm.org (Felipe de Azevedo Piovezan via lldb-commits) Date: Fri, 06 Oct 2023 15:29:17 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208a3d.170a0220.a190c.ee37@mx.google.com> ================ @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; ---------------- felipepiovezan wrote: Doesn't LLDB have some abstractions to handle indentation? I thought I had seen something like this before, maybe someone else can chime in here https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:30:09 2023 From: lldb-commits at lists.llvm.org (Felipe de Azevedo Piovezan via lldb-commits) Date: Fri, 06 Oct 2023 15:30:09 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208a71.170a0220.2faf1.cf6a@mx.google.com> ================ @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; ---------------- felipepiovezan wrote: Oh I see other places in the old code do the same... https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:30:36 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:30:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208a8c.170a0220.68acd.ed3a@mx.google.com> ================ @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; ---------------- PortalPete wrote: Yeah, I wouldn't know about that yet, so someone else would need to chime in. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:35:31 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:35:31 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208bb3.170a0220.cc235.2fb9@mx.google.com> https://github.com/PortalPete edited https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:35:31 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:35:31 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208bb3.630a0220.ef86f.2fd5@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." ---------------- PortalPete wrote: I tried using a multiline string, but that adds hard returns and all the indentation spaces in the middle of the string, which means you get this on the output console … ``` Confirm that the `po` command (alias) applies a FixIt and prints it out to the console, just like the `expression` command. ``` Instead of this… ``` Confirm that the `po` command (alias) applies a FixIt and prints it out to the console, just like the `expression` command. ``` Plus, I noticed that running the single unit test only prints the string up until the first `\n` to the console, which means it's truncating everything after it. When I use line `\` continuations, it shows the whole string without truncating it, which was the only way I could figure out how it to work without making the source line too long (in Python). https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:36:02 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:36:02 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208bd2.170a0220.ef2d9.ec5f@mx.google.com> https://github.com/PortalPete edited https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:38:40 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:38:40 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208c70.170a0220.dc059.f3af@mx.google.com> https://github.com/PortalPete edited https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:39:18 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:39:18 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208c96.a70a0220.f1521.665c@mx.google.com> https://github.com/PortalPete edited https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 15:41:47 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 15:41:47 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65208d2b.a70a0220.141a0.67d0@mx.google.com> ================ @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; ---------------- PortalPete wrote: Sure thing. I was wondering what that would be better between using a local versus being consistent with `CommandObjectExpression` class. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 16:07:52 2023 From: lldb-commits at lists.llvm.org (Artem Belevich via lldb-commits) Date: Fri, 06 Oct 2023 16:07:52 -0700 (PDT) Subject: [Lldb-commits] [lldb] [NVPTX] Improve lowering of v4i8 (PR #67866) In-Reply-To: Message-ID: <65209348.a70a0220.57b79.65c8@mx.google.com> Artem-B wrote: Found another issue. We merge four independent byte loads with `align 1` into a 32-bit load, which fails at runtime on misaligned pointers. ``` %t0 = type { [17 x i8] } @shared_storage = linkonce_odr local_unnamed_addr addrspace(3) global %t0 undef, align 1 define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y) nounwind { %v = load <4 x i8>, ptr getelementptr inbounds (i8, ptr addrspacecast (ptr addrspace(3) @shared_storage to ptr), i64 9), align 1 ret <4 x i8> %v } ``` ``` mov.u64 %rd1, shared_storage; cvta.shared.u64 %rd2, %rd1; ld.u32 %r1, [%rd2+9]; st.param.b32 [func_retval0+0], %r1; ret; ``` https://github.com/llvm/llvm-project/pull/67866 From lldb-commits at lists.llvm.org Fri Oct 6 16:20:58 2023 From: lldb-commits at lists.llvm.org (Dave Lee via lldb-commits) Date: Fri, 06 Oct 2023 16:20:58 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520965a.170a0220.3558a.ee12@mx.google.com> ================ @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; ---------------- kastiglione wrote: Yes I noticed that expression needs it to be a member variable. Personally I'd prefer to see this class be slightly different (it is already, anyway), than copy what expression does. It's good to keep them consistent, but it's better, imo, to avoid introducing bits of unnecessary complexity. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 16:22:15 2023 From: lldb-commits at lists.llvm.org (Artem Belevich via lldb-commits) Date: Fri, 06 Oct 2023 16:22:15 -0700 (PDT) Subject: [Lldb-commits] [lldb] [NVPTX] Improve lowering of v4i8 (PR #67866) In-Reply-To: Message-ID: <652096a7.170a0220.2c4cc.f708@mx.google.com> https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/67866 >From 4771c973c4659b814eacbacc23bd3c6c877ce2da Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 29 Sep 2023 14:37:46 -0700 Subject: [PATCH 1/9] [NVPTX] Improve lowering of v4i8 Make it a legal type and plumb through lowering of relevant instructions. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 20 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 20 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 52 +- llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td | 2 +- .../NVPTX/load-with-non-coherent-cache.ll | 4 +- llvm/test/CodeGen/NVPTX/param-load-store.ll | 26 +- ...unfold-masked-merge-vector-variablemask.ll | 518 ++++-------------- llvm/test/CodeGen/NVPTX/vec8.ll | 5 +- 8 files changed, 177 insertions(+), 470 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0aef2591c6e2394..1daa4971981c25c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -829,6 +829,7 @@ pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, case MVT::v2f16: case MVT::v2bf16: case MVT::v2i16: + case MVT::v4i8: return Opcode_i32; case MVT::f32: return Opcode_f32; @@ -910,7 +911,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { // Vector Setting unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { - assert(Isv2x16VT(LoadedVT) && "Unexpected vector type"); + assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) && + "Unexpected vector type"); // v2f16/v2bf16/v2i16 is loaded using ld.b32 fromTypeWidth = 32; } @@ -1254,6 +1256,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDLoc DL(N); SDNode *LD; SDValue Base, Offset, Addr; + EVT OrigType = N->getValueType(0); EVT EltVT = Mem->getMemoryVT(); unsigned NumElts = 1; @@ -1261,12 +1264,15 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NumElts = EltVT.getVectorNumElements(); EltVT = EltVT.getVectorElementType(); // vectors of 16bits type are loaded/stored as multiples of v2x16 elements. - if ((EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) || - (EltVT == MVT::bf16 && N->getValueType(0) == MVT::v2bf16) || - (EltVT == MVT::i16 && N->getValueType(0) == MVT::v2i16)) { + if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) || + (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) || + (EltVT == MVT::i16 && OrigType == MVT::v2i16)) { assert(NumElts % 2 == 0 && "Vector must have even number of elements"); - EltVT = N->getValueType(0); + EltVT = OrigType; NumElts /= 2; + } else if (OrigType == MVT::v4i8) { + EltVT = OrigType; + NumElts = 1; } } @@ -1601,7 +1607,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { // concept of sign-/zero-extension, so emulate it here by adding an explicit // CVT instruction. Ptxas should clean up any redundancies here. - EVT OrigType = N->getValueType(0); LoadSDNode *LdNode = dyn_cast(N); if (OrigType != EltVT && @@ -1679,7 +1684,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { MVT ScalarVT = SimpleVT.getScalarType(); unsigned toTypeWidth = ScalarVT.getSizeInBits(); if (SimpleVT.isVector()) { - assert(Isv2x16VT(StoreVT) && "Unexpected vector type"); + assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) && + "Unexpected vector type"); // v2x16 is stored using st.b32 toTypeWidth = 32; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b24aae4792ce6a6..7880d70fb2c6fea 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -221,6 +221,11 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, llvm_unreachable("Unexpected type"); } NumElts /= 2; + } else if (EltVT.getSimpleVT() == MVT::i8 && + (NumElts % 4 == 0 || NumElts == 3)) { + // v*i8 are formally lowered as v4i8 + EltVT = MVT::v4i8; + NumElts = (NumElts + 3) / 4; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); @@ -458,6 +463,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass); addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); @@ -2631,7 +2637,7 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return expandUnalignedStore(Store, DAG); // v2f16, v2bf16 and v2i16 don't need special handling. - if (Isv2x16VT(VT)) + if (Isv2x16VT(VT) || VT == MVT::v4i8) return SDValue(); if (VT.isVector()) @@ -2903,7 +2909,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( EVT LoadVT = EltVT; if (EltVT == MVT::i1) LoadVT = MVT::i8; - else if (Isv2x16VT(EltVT)) + else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8) // getLoad needs a vector type, but it can't handle // vectors which contain v2f16 or v2bf16 elements. So we must load // using i32 here and then bitcast back. @@ -2929,7 +2935,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (EltVT == MVT::i1) Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); // v2f16 was loaded as an i32. Now we must bitcast it back. - else if (Isv2x16VT(EltVT)) + else if (EltVT != LoadVT) Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt); // If a promoted integer type is used, truncate down to the original @@ -5256,9 +5262,9 @@ static SDValue PerformEXTRACTCombine(SDNode *N, SDValue Vector = N->getOperand(0); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && - IsPTXVectorType(VectorVT.getSimpleVT())) + IsPTXVectorType(VectorVT.getSimpleVT()) && VectorVT != MVT::v4i8) return SDValue(); // Native vector loads already combine nicely w/ - // extract_vector_elt. + // extract_vector_elt, except for v4i8. // Don't mess with singletons or v2*16 types, we already handle them OK. if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT)) return SDValue(); @@ -5289,6 +5295,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N, // If element has non-integer type, bitcast it back to the expected type. if (EltVT != EltIVT) Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result); + // Past legalizer, we may need to extent i8 -> i16 to match the register type. + if (EltVT != N->getValueType(0)) + Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result); + return Result; } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 28c4cadb303ad4f..047161fb2027dee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1486,23 +1486,24 @@ defm OR : BITWISE<"or", or>; defm AND : BITWISE<"and", and>; defm XOR : BITWISE<"xor", xor>; -// Lower logical v2i16 ops as bitwise ops on b32. -def: Pat<(or (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (ORb32rr Int32Regs:$a, Int32Regs:$b)>; -def: Pat<(xor (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (XORb32rr Int32Regs:$a, Int32Regs:$b)>; -def: Pat<(and (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)), - (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; - -// The constants get legalized into a bitcast from i32, so that's what we need -// to match here. -def: Pat<(or Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (ORb32ri Int32Regs:$a, imm:$b)>; -def: Pat<(xor Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (XORb32ri Int32Regs:$a, imm:$b)>; -def: Pat<(and Int32Regs:$a, (v2i16 (bitconvert (i32 imm:$b)))), - (ANDb32ri Int32Regs:$a, imm:$b)>; - +// Lower logical v2i16/v4i8 ops as bitwise ops on b32. +foreach vt = [v2i16, v4i8] in { + def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)), + (ORb32rr Int32Regs:$a, Int32Regs:$b)>; + def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)), + (XORb32rr Int32Regs:$a, Int32Regs:$b)>; + def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)), + (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; + + // The constants get legalized into a bitcast from i32, so that's what we need + // to match here. + def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (ORb32ri Int32Regs:$a, imm:$b)>; + def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (XORb32ri Int32Regs:$a, imm:$b)>; + def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + (ANDb32ri Int32Regs:$a, imm:$b)>; +} def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), "not.pred \t$dst, $src;", @@ -2682,7 +2683,7 @@ foreach vt = [f16, bf16] in { def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>; } -foreach vt = [v2f16, v2bf16, v2i16] in { +foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>; } @@ -2995,8 +2996,8 @@ def: Pat<(i16 (bitconvert (vt Int16Regs:$a))), (ProxyRegI16 Int16Regs:$a)>; } -foreach ta = [v2f16, v2bf16, v2i16, i32] in { - foreach tb = [v2f16, v2bf16, v2i16, i32] in { +foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in { + foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in { if !ne(ta, tb) then { def: Pat<(ta (bitconvert (tb Int32Regs:$a))), (ProxyRegI32 Int32Regs:$a)>; @@ -3292,6 +3293,10 @@ let hasSideEffects = false in { (ins Int16Regs:$s1, Int16Regs:$s2, Int16Regs:$s3, Int16Regs:$s4), "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; + def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b32 \t$d, {{$s1, $s2, $s3, $s4}};", []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2), "mov.b32 \t$d, {{$s1, $s2}};", []>; @@ -3307,6 +3312,10 @@ let hasSideEffects = false in { Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; + def I32toV4I8 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int32Regs:$s), + "mov.b32 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), "mov.b32 \t{{$d1, $d2}}, $s;", []>; @@ -3354,6 +3363,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; +def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), + (i16 Int16Regs:$c), (i16 Int16Regs:$d))), + (V4I8toI32 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c, Int16Regs:$d)>; // Count leading zeros let hasSideEffects = false in { diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td index ed9dabf39dd7ad9..b5231a9cf67f93a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -58,7 +58,7 @@ foreach i = 0...31 in { //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; -def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16], 32, +def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index 9012339fb6b1e20..98ab93774588d28 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -130,9 +130,9 @@ define void @foo12(ptr noalias readonly %from, ptr %to) { } ; SM20-LABEL: .visible .entry foo13( -; SM20: ld.global.v4.u8 +; SM20: ld.global.u32 ; SM35-LABEL: .visible .entry foo13( -; SM35: ld.global.nc.v4.u8 +; SM35: ld.global.nc.u32 define void @foo13(ptr noalias readonly %from, ptr %to) { %1 = load <4 x i8>, ptr %from store <4 x i8> %1, ptr %to diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 2d87271e30ae0b7..b4208c691c91dfa 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -212,18 +212,16 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v3i8( ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] -; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2]; -; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0]; +; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v3i8_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; -; CHECK: st.param.b8 [param0+2], [[E2]]; +; CHECK: st.param.b32 [param0+0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i8, -; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; -; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}; -; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; +; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0+0]; +; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very +; interesting here, so it's skipped. +; CHECK: st.param.b32 [func_retval0+0], ; CHECK-NEXT: ret; define <3 x i8> @test_v3i8(<3 x i8> %a) { %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); @@ -233,14 +231,14 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v4i8( ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] -; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0] +; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0] ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK: st.param.b32 [param0+0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i8, -; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0+0]; +; CHECK: st.param.b32 [func_retval0+0], [[RET]]; ; CHECK-NEXT: ret; define <4 x i8> @test_v4i8(<4 x i8> %a) { %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); @@ -250,10 +248,10 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK: .func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v5i8( ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] +; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_v5i8_param_0] ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; -; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0] ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK-DAG: st.param.v4.b8 [param0+0], ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 16579de882ed4b6..74087be4834d966 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -60,46 +60,20 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot2[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b32 %r<21>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<11>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot2; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v4i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v4i8_param_2]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: and.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs7, %rs8}; -; CHECK-NEXT: and.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v4i8_param_1]; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r8, {%rs9, %rs10}; -; CHECK-NEXT: xor.b32 %r9, %r5, 16711935; -; CHECK-NEXT: xor.b32 %r10, %r3, 16711935; -; CHECK-NEXT: and.b32 %r11, %r8, %r10; -; CHECK-NEXT: and.b32 %r12, %r7, %r9; -; CHECK-NEXT: or.b32 %r13, %r6, %r12; -; CHECK-NEXT: mov.b32 {%rs13, %rs14}, %r13; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs13, %rs14}; -; CHECK-NEXT: or.b32 %r14, %r4, %r11; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r14; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs15, %rs16}; -; CHECK-NEXT: ld.u16 %r15, [%SP+0]; -; CHECK-NEXT: shl.b32 %r16, %r15, 16; -; CHECK-NEXT: ld.u16 %r17, [%SP+2]; -; CHECK-NEXT: or.b32 %r18, %r17, %r16; -; CHECK-NEXT: shr.u32 %r19, %r18, 8; -; CHECK-NEXT: cvt.u16.u32 %rs17, %r19; -; CHECK-NEXT: cvt.u16.u32 %rs18, %r15; -; CHECK-NEXT: bfe.s32 %r20, %r15, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r20; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs15, %rs17, %rs18, %rs19}; +; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_param_2]; +; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r1; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r8, %r1, %r7; +; CHECK-NEXT: and.b32 %r9, %r3, %r8; +; CHECK-NEXT: or.b32 %r10, %r5, %r9; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -111,48 +85,20 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot3[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<22>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<11>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot3; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v4i8_undef_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v4i8_undef_param_2]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: and.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs7, %rs8}; -; CHECK-NEXT: and.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v4i8_undef_param_1]; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r8, {%rs9, %rs10}; -; CHECK-NEXT: mov.u16 %rs13, 255; -; CHECK-NEXT: mov.b32 %r9, {%rs14, %rs13}; -; CHECK-NEXT: xor.b32 %r10, %r5, %r9; -; CHECK-NEXT: xor.b32 %r11, %r3, 16711935; -; CHECK-NEXT: and.b32 %r12, %r8, %r11; -; CHECK-NEXT: and.b32 %r13, %r7, %r10; -; CHECK-NEXT: or.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r14; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs15, %rs16}; -; CHECK-NEXT: or.b32 %r15, %r4, %r12; -; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs17, %rs18}; -; CHECK-NEXT: ld.u16 %r16, [%SP+0]; -; CHECK-NEXT: shl.b32 %r17, %r16, 16; -; CHECK-NEXT: ld.u16 %r18, [%SP+2]; -; CHECK-NEXT: or.b32 %r19, %r18, %r17; -; CHECK-NEXT: shr.u32 %r20, %r19, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r20; -; CHECK-NEXT: cvt.u16.u32 %rs20, %r16; -; CHECK-NEXT: bfe.s32 %r21, %r16, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs21, %r21; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs17, %rs19, %rs20, %rs21}; +; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_undef_param_2]; +; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r1; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs2, %rs1}; +; CHECK-NEXT: xor.b32 %r8, %r1, %r7; +; CHECK-NEXT: and.b32 %r9, %r3, %r8; +; CHECK-NEXT: or.b32 %r10, %r5, %r9; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -212,84 +158,24 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwin define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: out_v8i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot6[8]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<40>; -; CHECK-NEXT: .reg .b32 %r<38>; -; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<22>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot6; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v8i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v8i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: mov.b32 %r4, {%rs5, %rs6}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v8i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r5, {%rs9, %rs10}; -; CHECK-NEXT: and.b32 %r6, %r4, %r5; -; CHECK-NEXT: mov.b32 %r7, {%rs11, %rs12}; -; CHECK-NEXT: and.b32 %r8, %r3, %r7; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [out_v8i8_param_2]; -; CHECK-NEXT: mov.b32 %r9, {%rs13, %rs14}; -; CHECK-NEXT: and.b32 %r10, %r2, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs15, %rs16}; -; CHECK-NEXT: and.b32 %r12, %r1, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [out_v8i8_param_1]; -; CHECK-NEXT: mov.b32 %r13, {%rs19, %rs20}; -; CHECK-NEXT: mov.b32 %r14, {%rs17, %rs18}; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [out_v8i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r15, {%rs23, %rs24}; -; CHECK-NEXT: mov.b32 %r16, {%rs21, %rs22}; -; CHECK-NEXT: xor.b32 %r17, %r11, 16711935; -; CHECK-NEXT: xor.b32 %r18, %r9, 16711935; -; CHECK-NEXT: xor.b32 %r19, %r7, 16711935; -; CHECK-NEXT: xor.b32 %r20, %r5, 16711935; -; CHECK-NEXT: and.b32 %r21, %r16, %r20; -; CHECK-NEXT: and.b32 %r22, %r15, %r19; -; CHECK-NEXT: and.b32 %r23, %r14, %r18; -; CHECK-NEXT: and.b32 %r24, %r13, %r17; -; CHECK-NEXT: or.b32 %r25, %r12, %r24; -; CHECK-NEXT: mov.b32 {%rs25, %rs26}, %r25; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs25, %rs26}; -; CHECK-NEXT: or.b32 %r26, %r10, %r23; -; CHECK-NEXT: mov.b32 {%rs27, %rs28}, %r26; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs27, %rs28}; -; CHECK-NEXT: or.b32 %r27, %r8, %r22; -; CHECK-NEXT: mov.b32 {%rs29, %rs30}, %r27; -; CHECK-NEXT: st.v2.u8 [%SP+4], {%rs29, %rs30}; -; CHECK-NEXT: or.b32 %r28, %r6, %r21; -; CHECK-NEXT: mov.b32 {%rs31, %rs32}, %r28; -; CHECK-NEXT: st.v2.u8 [%SP+6], {%rs31, %rs32}; -; CHECK-NEXT: ld.u16 %r29, [%SP+0]; -; CHECK-NEXT: shl.b32 %r30, %r29, 16; -; CHECK-NEXT: ld.u16 %r31, [%SP+2]; -; CHECK-NEXT: or.b32 %r32, %r31, %r30; -; CHECK-NEXT: cvt.u64.u32 %rd1, %r32; -; CHECK-NEXT: ld.u16 %r33, [%SP+4]; -; CHECK-NEXT: shl.b32 %r34, %r33, 16; -; CHECK-NEXT: ld.u16 %r35, [%SP+6]; -; CHECK-NEXT: or.b32 %r36, %r35, %r34; -; CHECK-NEXT: cvt.u64.u32 %rd2, %r36; -; CHECK-NEXT: shl.b64 %rd3, %rd2, 32; -; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3; -; CHECK-NEXT: shr.u32 %r37, %r36, 8; -; CHECK-NEXT: shr.u64 %rd5, %rd4, 24; -; CHECK-NEXT: cvt.u16.u64 %rs33, %rd5; -; CHECK-NEXT: shr.u64 %rd6, %rd1, 16; -; CHECK-NEXT: cvt.u16.u64 %rs34, %rd6; -; CHECK-NEXT: shr.u64 %rd7, %rd1, 8; -; CHECK-NEXT: cvt.u16.u64 %rs35, %rd7; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs27, %rs35, %rs34, %rs33}; -; CHECK-NEXT: cvt.u16.u32 %rs36, %r37; -; CHECK-NEXT: bfe.s64 %rd8, %rd2, 24, 8; -; CHECK-NEXT: cvt.u16.u64 %rs37, %rd8; -; CHECK-NEXT: cvt.u16.u32 %rs38, %r33; -; CHECK-NEXT: cvt.u16.u32 %rs39, %r35; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs39, %rs36, %rs38, %rs37}; +; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_1]; +; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [out_v8i8_param_2]; +; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v8i8_param_0]; +; CHECK-NEXT: and.b32 %r11, %r9, %r5; +; CHECK-NEXT: and.b32 %r13, %r10, %r6; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r15, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r16, %r6, %r15; +; CHECK-NEXT: xor.b32 %r17, %r5, %r15; +; CHECK-NEXT: and.b32 %r18, %r1, %r17; +; CHECK-NEXT: and.b32 %r19, %r2, %r16; +; CHECK-NEXT: or.b32 %r20, %r13, %r19; +; CHECK-NEXT: or.b32 %r21, %r11, %r18; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; ; CHECK-NEXT: ret; %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, @@ -408,90 +294,32 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwin define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: out_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<65>; -; CHECK-NEXT: .reg .b32 %r<57>; +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<42>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [out_v16i8_param_0+12]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [out_v16i8_param_0+8]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: mov.b32 %r4, {%rs7, %rs8}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [out_v16i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r5, {%rs9, %rs10}; -; CHECK-NEXT: mov.b32 %r6, {%rs11, %rs12}; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [out_v16i8_param_0]; -; CHECK-NEXT: mov.b32 %r7, {%rs13, %rs14}; -; CHECK-NEXT: mov.b32 %r8, {%rs15, %rs16}; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [out_v16i8_param_2]; -; CHECK-NEXT: mov.b32 %r9, {%rs19, %rs20}; -; CHECK-NEXT: and.b32 %r10, %r8, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs17, %rs18}; -; CHECK-NEXT: and.b32 %r12, %r7, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [out_v16i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r13, {%rs23, %rs24}; -; CHECK-NEXT: and.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs21, %rs22}; -; CHECK-NEXT: and.b32 %r16, %r5, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs25, %rs26, %rs27, %rs28}, [out_v16i8_param_2+8]; -; CHECK-NEXT: mov.b32 %r17, {%rs27, %rs28}; -; CHECK-NEXT: and.b32 %r18, %r4, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs25, %rs26}; -; CHECK-NEXT: and.b32 %r20, %r3, %r19; -; CHECK-NEXT: ld.param.v4.u8 {%rs29, %rs30, %rs31, %rs32}, [out_v16i8_param_2+12]; -; CHECK-NEXT: mov.b32 %r21, {%rs31, %rs32}; -; CHECK-NEXT: and.b32 %r22, %r2, %r21; -; CHECK-NEXT: mov.b32 %r23, {%rs29, %rs30}; -; CHECK-NEXT: and.b32 %r24, %r1, %r23; -; CHECK-NEXT: ld.param.v4.u8 {%rs33, %rs34, %rs35, %rs36}, [out_v16i8_param_1+12]; -; CHECK-NEXT: mov.b32 %r25, {%rs33, %rs34}; -; CHECK-NEXT: mov.b32 %r26, {%rs35, %rs36}; -; CHECK-NEXT: ld.param.v4.u8 {%rs37, %rs38, %rs39, %rs40}, [out_v16i8_param_1+8]; -; CHECK-NEXT: mov.b32 %r27, {%rs37, %rs38}; -; CHECK-NEXT: mov.b32 %r28, {%rs39, %rs40}; -; CHECK-NEXT: ld.param.v4.u8 {%rs41, %rs42, %rs43, %rs44}, [out_v16i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r29, {%rs41, %rs42}; -; CHECK-NEXT: mov.b32 %r30, {%rs43, %rs44}; -; CHECK-NEXT: ld.param.v4.u8 {%rs45, %rs46, %rs47, %rs48}, [out_v16i8_param_1]; -; CHECK-NEXT: mov.b32 %r31, {%rs45, %rs46}; -; CHECK-NEXT: mov.b32 %r32, {%rs47, %rs48}; -; CHECK-NEXT: xor.b32 %r33, %r23, 16711935; -; CHECK-NEXT: xor.b32 %r34, %r21, 16711935; -; CHECK-NEXT: xor.b32 %r35, %r19, 16711935; -; CHECK-NEXT: xor.b32 %r36, %r17, 16711935; -; CHECK-NEXT: xor.b32 %r37, %r15, 16711935; -; CHECK-NEXT: xor.b32 %r38, %r13, 16711935; -; CHECK-NEXT: xor.b32 %r39, %r11, 16711935; -; CHECK-NEXT: xor.b32 %r40, %r9, 16711935; -; CHECK-NEXT: and.b32 %r41, %r32, %r40; -; CHECK-NEXT: and.b32 %r42, %r31, %r39; -; CHECK-NEXT: and.b32 %r43, %r30, %r38; -; CHECK-NEXT: and.b32 %r44, %r29, %r37; -; CHECK-NEXT: and.b32 %r45, %r28, %r36; -; CHECK-NEXT: and.b32 %r46, %r27, %r35; -; CHECK-NEXT: and.b32 %r47, %r26, %r34; -; CHECK-NEXT: and.b32 %r48, %r25, %r33; -; CHECK-NEXT: or.b32 %r49, %r24, %r48; -; CHECK-NEXT: or.b32 %r50, %r22, %r47; -; CHECK-NEXT: or.b32 %r51, %r20, %r46; -; CHECK-NEXT: or.b32 %r52, %r18, %r45; -; CHECK-NEXT: or.b32 %r53, %r16, %r44; -; CHECK-NEXT: or.b32 %r54, %r14, %r43; -; CHECK-NEXT: or.b32 %r55, %r12, %r42; -; CHECK-NEXT: or.b32 %r56, %r10, %r41; -; CHECK-NEXT: mov.b32 {%rs49, %rs50}, %r56; -; CHECK-NEXT: mov.b32 {%rs51, %rs52}, %r55; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs51, %rs52, %rs49, %rs50}; -; CHECK-NEXT: mov.b32 {%rs53, %rs54}, %r54; -; CHECK-NEXT: mov.b32 {%rs55, %rs56}, %r53; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs55, %rs56, %rs53, %rs54}; -; CHECK-NEXT: mov.b32 {%rs57, %rs58}, %r52; -; CHECK-NEXT: mov.b32 {%rs59, %rs60}, %r51; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs59, %rs60, %rs57, %rs58}; -; CHECK-NEXT: mov.b32 {%rs61, %rs62}, %r50; -; CHECK-NEXT: mov.b32 {%rs63, %rs64}, %r49; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs63, %rs64, %rs61, %rs62}; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_1]; +; CHECK-NEXT: ld.param.v4.u32 {%r9, %r10, %r11, %r12}, [out_v16i8_param_2]; +; CHECK-NEXT: ld.param.v4.u32 {%r17, %r18, %r19, %r20}, [out_v16i8_param_0]; +; CHECK-NEXT: and.b32 %r21, %r17, %r9; +; CHECK-NEXT: and.b32 %r23, %r18, %r10; +; CHECK-NEXT: and.b32 %r25, %r19, %r11; +; CHECK-NEXT: and.b32 %r27, %r20, %r12; +; CHECK-NEXT: mov.u16 %rs1, -1; +; CHECK-NEXT: mov.b32 %r29, {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: xor.b32 %r30, %r12, %r29; +; CHECK-NEXT: xor.b32 %r31, %r11, %r29; +; CHECK-NEXT: xor.b32 %r32, %r10, %r29; +; CHECK-NEXT: xor.b32 %r33, %r9, %r29; +; CHECK-NEXT: and.b32 %r34, %r1, %r33; +; CHECK-NEXT: and.b32 %r35, %r2, %r32; +; CHECK-NEXT: and.b32 %r36, %r3, %r31; +; CHECK-NEXT: and.b32 %r37, %r4, %r30; +; CHECK-NEXT: or.b32 %r38, %r27, %r37; +; CHECK-NEXT: or.b32 %r39, %r25, %r36; +; CHECK-NEXT: or.b32 %r40, %r23, %r35; +; CHECK-NEXT: or.b32 %r41, %r21, %r34; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r41, %r40, %r39, %r38}; ; CHECK-NEXT: ret; %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, @@ -684,44 +512,16 @@ define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: in_v4i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot18[4]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-NEXT: .reg .b32 %r<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot18; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v4i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v4i8_param_1]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: xor.b32 %r4, %r2, %r3; -; CHECK-NEXT: mov.b32 %r5, {%rs5, %rs6}; -; CHECK-NEXT: xor.b32 %r6, %r1, %r5; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v4i8_param_2]; -; CHECK-NEXT: mov.b32 %r7, {%rs9, %rs10}; -; CHECK-NEXT: and.b32 %r8, %r6, %r7; -; CHECK-NEXT: mov.b32 %r9, {%rs11, %rs12}; -; CHECK-NEXT: and.b32 %r10, %r4, %r9; -; CHECK-NEXT: xor.b32 %r11, %r10, %r3; -; CHECK-NEXT: mov.b32 {%rs13, %rs14}, %r11; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs13, %rs14}; -; CHECK-NEXT: xor.b32 %r12, %r8, %r5; -; CHECK-NEXT: mov.b32 {%rs15, %rs16}, %r12; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs15, %rs16}; -; CHECK-NEXT: ld.u16 %r13, [%SP+0]; -; CHECK-NEXT: shl.b32 %r14, %r13, 16; -; CHECK-NEXT: ld.u16 %r15, [%SP+2]; -; CHECK-NEXT: or.b32 %r16, %r15, %r14; -; CHECK-NEXT: shr.u32 %r17, %r16, 8; -; CHECK-NEXT: cvt.u16.u32 %rs17, %r17; -; CHECK-NEXT: cvt.u16.u32 %rs18, %r13; -; CHECK-NEXT: bfe.s32 %r18, %r13, 8, 8; -; CHECK-NEXT: cvt.u16.u32 %rs19, %r18; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs15, %rs17, %rs18, %rs19}; +; CHECK-NEXT: ld.param.u32 %r1, [in_v4i8_param_0]; +; CHECK-NEXT: ld.param.u32 %r2, [in_v4i8_param_1]; +; CHECK-NEXT: xor.b32 %r3, %r1, %r2; +; CHECK-NEXT: ld.param.u32 %r4, [in_v4i8_param_2]; +; CHECK-NEXT: and.b32 %r5, %r3, %r4; +; CHECK-NEXT: xor.b32 %r6, %r5, %r2; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; ; CHECK-NEXT: ret; %n0 = xor <4 x i8> %x, %y %n1 = and <4 x i8> %n0, %mask @@ -776,80 +576,19 @@ define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: in_v8i8( ; CHECK: { -; CHECK-NEXT: .local .align 2 .b8 __local_depot21[8]; -; CHECK-NEXT: .reg .b64 %SP; -; CHECK-NEXT: .reg .b64 %SPL; -; CHECK-NEXT: .reg .b16 %rs<40>; -; CHECK-NEXT: .reg .b32 %r<34>; -; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-NEXT: .reg .b32 %r<15>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %SPL, __local_depot21; -; CHECK-NEXT: cvta.local.u64 %SP, %SPL; -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v8i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v8i8_param_0]; -; CHECK-NEXT: mov.b32 %r3, {%rs5, %rs6}; -; CHECK-NEXT: mov.b32 %r4, {%rs7, %rs8}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v8i8_param_1]; -; CHECK-NEXT: mov.b32 %r5, {%rs11, %rs12}; -; CHECK-NEXT: xor.b32 %r6, %r4, %r5; -; CHECK-NEXT: mov.b32 %r7, {%rs9, %rs10}; -; CHECK-NEXT: xor.b32 %r8, %r3, %r7; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [in_v8i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r9, {%rs15, %rs16}; -; CHECK-NEXT: xor.b32 %r10, %r2, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs13, %rs14}; -; CHECK-NEXT: xor.b32 %r12, %r1, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [in_v8i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r13, {%rs17, %rs18}; -; CHECK-NEXT: and.b32 %r14, %r12, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs19, %rs20}; -; CHECK-NEXT: and.b32 %r16, %r10, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [in_v8i8_param_2]; -; CHECK-NEXT: mov.b32 %r17, {%rs21, %rs22}; -; CHECK-NEXT: and.b32 %r18, %r8, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs23, %rs24}; -; CHECK-NEXT: and.b32 %r20, %r6, %r19; -; CHECK-NEXT: xor.b32 %r21, %r20, %r5; -; CHECK-NEXT: mov.b32 {%rs25, %rs26}, %r21; -; CHECK-NEXT: st.v2.u8 [%SP+0], {%rs25, %rs26}; -; CHECK-NEXT: xor.b32 %r22, %r18, %r7; -; CHECK-NEXT: mov.b32 {%rs27, %rs28}, %r22; -; CHECK-NEXT: st.v2.u8 [%SP+2], {%rs27, %rs28}; -; CHECK-NEXT: xor.b32 %r23, %r16, %r9; -; CHECK-NEXT: mov.b32 {%rs29, %rs30}, %r23; -; CHECK-NEXT: st.v2.u8 [%SP+4], {%rs29, %rs30}; -; CHECK-NEXT: xor.b32 %r24, %r14, %r11; -; CHECK-NEXT: mov.b32 {%rs31, %rs32}, %r24; -; CHECK-NEXT: st.v2.u8 [%SP+6], {%rs31, %rs32}; -; CHECK-NEXT: ld.u16 %r25, [%SP+0]; -; CHECK-NEXT: shl.b32 %r26, %r25, 16; -; CHECK-NEXT: ld.u16 %r27, [%SP+2]; -; CHECK-NEXT: or.b32 %r28, %r27, %r26; -; CHECK-NEXT: cvt.u64.u32 %rd1, %r28; -; CHECK-NEXT: ld.u16 %r29, [%SP+4]; -; CHECK-NEXT: shl.b32 %r30, %r29, 16; -; CHECK-NEXT: ld.u16 %r31, [%SP+6]; -; CHECK-NEXT: or.b32 %r32, %r31, %r30; -; CHECK-NEXT: cvt.u64.u32 %rd2, %r32; -; CHECK-NEXT: shl.b64 %rd3, %rd2, 32; -; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3; -; CHECK-NEXT: shr.u32 %r33, %r32, 8; -; CHECK-NEXT: shr.u64 %rd5, %rd4, 24; -; CHECK-NEXT: cvt.u16.u64 %rs33, %rd5; -; CHECK-NEXT: shr.u64 %rd6, %rd1, 16; -; CHECK-NEXT: cvt.u16.u64 %rs34, %rd6; -; CHECK-NEXT: shr.u64 %rd7, %rd1, 8; -; CHECK-NEXT: cvt.u16.u64 %rs35, %rd7; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs27, %rs35, %rs34, %rs33}; -; CHECK-NEXT: cvt.u16.u32 %rs36, %r33; -; CHECK-NEXT: bfe.s64 %rd8, %rd2, 24, 8; -; CHECK-NEXT: cvt.u16.u64 %rs37, %rd8; -; CHECK-NEXT: cvt.u16.u32 %rs38, %r29; -; CHECK-NEXT: cvt.u16.u32 %rs39, %r31; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs39, %rs36, %rs38, %rs37}; +; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [in_v8i8_param_0]; +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [in_v8i8_param_1]; +; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [in_v8i8_param_2]; +; CHECK-NEXT: xor.b32 %r7, %r2, %r4; +; CHECK-NEXT: and.b32 %r8, %r7, %r6; +; CHECK-NEXT: xor.b32 %r9, %r8, %r4; +; CHECK-NEXT: xor.b32 %r11, %r1, %r3; +; CHECK-NEXT: and.b32 %r12, %r11, %r5; +; CHECK-NEXT: xor.b32 %r13, %r12, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r13, %r9}; ; CHECK-NEXT: ret; %n0 = xor <8 x i8> %x, %y %n1 = and <8 x i8> %n0, %mask @@ -930,82 +669,25 @@ define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: in_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<65>; -; CHECK-NEXT: .reg .b32 %r<49>; +; CHECK-NEXT: .reg .b32 %r<29>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.u8 {%rs1, %rs2, %rs3, %rs4}, [in_v16i8_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; -; CHECK-NEXT: mov.b32 %r2, {%rs1, %rs2}; -; CHECK-NEXT: ld.param.v4.u8 {%rs5, %rs6, %rs7, %rs8}, [in_v16i8_param_0+4]; -; CHECK-NEXT: mov.b32 %r3, {%rs7, %rs8}; -; CHECK-NEXT: mov.b32 %r4, {%rs5, %rs6}; -; CHECK-NEXT: ld.param.v4.u8 {%rs9, %rs10, %rs11, %rs12}, [in_v16i8_param_0+8]; -; CHECK-NEXT: mov.b32 %r5, {%rs11, %rs12}; -; CHECK-NEXT: mov.b32 %r6, {%rs9, %rs10}; -; CHECK-NEXT: ld.param.v4.u8 {%rs13, %rs14, %rs15, %rs16}, [in_v16i8_param_0+12]; -; CHECK-NEXT: mov.b32 %r7, {%rs15, %rs16}; -; CHECK-NEXT: mov.b32 %r8, {%rs13, %rs14}; -; CHECK-NEXT: ld.param.v4.u8 {%rs17, %rs18, %rs19, %rs20}, [in_v16i8_param_1+12]; -; CHECK-NEXT: mov.b32 %r9, {%rs17, %rs18}; -; CHECK-NEXT: xor.b32 %r10, %r8, %r9; -; CHECK-NEXT: mov.b32 %r11, {%rs19, %rs20}; -; CHECK-NEXT: xor.b32 %r12, %r7, %r11; -; CHECK-NEXT: ld.param.v4.u8 {%rs21, %rs22, %rs23, %rs24}, [in_v16i8_param_1+8]; -; CHECK-NEXT: mov.b32 %r13, {%rs21, %rs22}; -; CHECK-NEXT: xor.b32 %r14, %r6, %r13; -; CHECK-NEXT: mov.b32 %r15, {%rs23, %rs24}; -; CHECK-NEXT: xor.b32 %r16, %r5, %r15; -; CHECK-NEXT: ld.param.v4.u8 {%rs25, %rs26, %rs27, %rs28}, [in_v16i8_param_1+4]; -; CHECK-NEXT: mov.b32 %r17, {%rs25, %rs26}; -; CHECK-NEXT: xor.b32 %r18, %r4, %r17; -; CHECK-NEXT: mov.b32 %r19, {%rs27, %rs28}; -; CHECK-NEXT: xor.b32 %r20, %r3, %r19; -; CHECK-NEXT: ld.param.v4.u8 {%rs29, %rs30, %rs31, %rs32}, [in_v16i8_param_1]; -; CHECK-NEXT: mov.b32 %r21, {%rs29, %rs30}; -; CHECK-NEXT: xor.b32 %r22, %r2, %r21; -; CHECK-NEXT: mov.b32 %r23, {%rs31, %rs32}; -; CHECK-NEXT: xor.b32 %r24, %r1, %r23; -; CHECK-NEXT: ld.param.v4.u8 {%rs33, %rs34, %rs35, %rs36}, [in_v16i8_param_2]; -; CHECK-NEXT: mov.b32 %r25, {%rs35, %rs36}; -; CHECK-NEXT: and.b32 %r26, %r24, %r25; -; CHECK-NEXT: mov.b32 %r27, {%rs33, %rs34}; -; CHECK-NEXT: and.b32 %r28, %r22, %r27; -; CHECK-NEXT: ld.param.v4.u8 {%rs37, %rs38, %rs39, %rs40}, [in_v16i8_param_2+4]; -; CHECK-NEXT: mov.b32 %r29, {%rs39, %rs40}; -; CHECK-NEXT: and.b32 %r30, %r20, %r29; -; CHECK-NEXT: mov.b32 %r31, {%rs37, %rs38}; -; CHECK-NEXT: and.b32 %r32, %r18, %r31; -; CHECK-NEXT: ld.param.v4.u8 {%rs41, %rs42, %rs43, %rs44}, [in_v16i8_param_2+8]; -; CHECK-NEXT: mov.b32 %r33, {%rs43, %rs44}; -; CHECK-NEXT: and.b32 %r34, %r16, %r33; -; CHECK-NEXT: mov.b32 %r35, {%rs41, %rs42}; -; CHECK-NEXT: and.b32 %r36, %r14, %r35; -; CHECK-NEXT: ld.param.v4.u8 {%rs45, %rs46, %rs47, %rs48}, [in_v16i8_param_2+12]; -; CHECK-NEXT: mov.b32 %r37, {%rs47, %rs48}; -; CHECK-NEXT: and.b32 %r38, %r12, %r37; -; CHECK-NEXT: mov.b32 %r39, {%rs45, %rs46}; -; CHECK-NEXT: and.b32 %r40, %r10, %r39; -; CHECK-NEXT: xor.b32 %r41, %r40, %r9; -; CHECK-NEXT: xor.b32 %r42, %r38, %r11; -; CHECK-NEXT: xor.b32 %r43, %r36, %r13; -; CHECK-NEXT: xor.b32 %r44, %r34, %r15; -; CHECK-NEXT: xor.b32 %r45, %r32, %r17; -; CHECK-NEXT: xor.b32 %r46, %r30, %r19; -; CHECK-NEXT: xor.b32 %r47, %r28, %r21; -; CHECK-NEXT: xor.b32 %r48, %r26, %r23; -; CHECK-NEXT: mov.b32 {%rs49, %rs50}, %r48; -; CHECK-NEXT: mov.b32 {%rs51, %rs52}, %r47; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs51, %rs52, %rs49, %rs50}; -; CHECK-NEXT: mov.b32 {%rs53, %rs54}, %r46; -; CHECK-NEXT: mov.b32 {%rs55, %rs56}, %r45; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs55, %rs56, %rs53, %rs54}; -; CHECK-NEXT: mov.b32 {%rs57, %rs58}, %r44; -; CHECK-NEXT: mov.b32 {%rs59, %rs60}, %r43; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs59, %rs60, %rs57, %rs58}; -; CHECK-NEXT: mov.b32 {%rs61, %rs62}, %r42; -; CHECK-NEXT: mov.b32 {%rs63, %rs64}, %r41; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs63, %rs64, %rs61, %rs62}; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v16i8_param_0]; +; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v16i8_param_1]; +; CHECK-NEXT: xor.b32 %r9, %r4, %r8; +; CHECK-NEXT: xor.b32 %r10, %r3, %r7; +; CHECK-NEXT: xor.b32 %r11, %r2, %r6; +; CHECK-NEXT: xor.b32 %r12, %r1, %r5; +; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v16i8_param_2]; +; CHECK-NEXT: and.b32 %r17, %r12, %r13; +; CHECK-NEXT: and.b32 %r18, %r11, %r14; +; CHECK-NEXT: and.b32 %r19, %r10, %r15; +; CHECK-NEXT: and.b32 %r20, %r9, %r16; +; CHECK-NEXT: xor.b32 %r21, %r20, %r8; +; CHECK-NEXT: xor.b32 %r23, %r19, %r7; +; CHECK-NEXT: xor.b32 %r25, %r18, %r6; +; CHECK-NEXT: xor.b32 %r27, %r17, %r5; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r27, %r25, %r23, %r21}; ; CHECK-NEXT: ret; %n0 = xor <16 x i8> %x, %y %n1 = and <16 x i8> %n0, %mask diff --git a/llvm/test/CodeGen/NVPTX/vec8.ll b/llvm/test/CodeGen/NVPTX/vec8.ll index 092607462f3329b..8333a9b935d6af8 100644 --- a/llvm/test/CodeGen/NVPTX/vec8.ll +++ b/llvm/test/CodeGen/NVPTX/vec8.ll @@ -5,10 +5,9 @@ target triple = "nvptx-unknown-cuda" ; CHECK: .visible .func foo define void @foo(<8 x i8> %a, ptr %b) { -; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [foo_param_0] -; CHECK-DAG: ld.param.v4.u8 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [foo_param_0+4] +; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo_param_0] ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9+]]], [foo_param_1] -; CHECK: add.s16 [[T:%rs[0-9+]]], [[E1]], [[E6]]; +; CHECK: add.s16 [[T:%rs[0-9+]]], ; CHECK: st.u8 [%[[B]]], [[T]]; %t0 = extractelement <8 x i8> %a, i32 1 %t1 = extractelement <8 x i8> %a, i32 6 >From bda4bd36ded20dba4ac89824a42b8a2017c41247 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 2 Oct 2023 18:05:42 -0700 Subject: [PATCH 2/9] More work on fleshing out extractelt/build_vector for v4i8 Verified that NVPTX tests pass with ptxas being able to compiler PTX produced by llc tests. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 18 ++++ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 + llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 71 +++++++------- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 64 +++++++++---- llvm/test/CodeGen/NVPTX/extractelement.ll | 55 ++++++++++- llvm/test/CodeGen/NVPTX/i16x2-instructions.ll | 2 +- ...unfold-masked-merge-vector-variablemask.ll | 95 ++++++++----------- 7 files changed, 196 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 1daa4971981c25c..c3bcf8f05a278ad 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsNVPTX.h" @@ -3569,6 +3570,23 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); } +bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, + SDValue &BitOffset) { + SDValue Vector = N->getOperand(0); + if (!(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Vector->getValueType(0) == MVT::v4i8)) + return false; + + if (const ConstantSDNode *IdxConst = + dyn_cast(N->getOperand(1))) { + V = Vector; + BitOffset = CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, + SDLoc(N), MVT::i32); + return true; + } + return false; +} + bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 06922331f5e2059..34b5dd449ce086f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -95,6 +95,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { SDValue &Offset); bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectExtractEltFromV4I8(SDValue N, SDValue &Value, SDValue &Idx); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7880d70fb2c6fea..66dcdb53b136b96 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -497,6 +497,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand); + // TODO: we should eventually lower it as PRMT instruction. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom); + // Operations not directly supported by NVPTX. for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, MVT::f64, @@ -2156,45 +2160,47 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return DAG.getBuildVector(Node->getValueType(0), dl, Ops); } -// We can init constant f16x2 with a single .b32 move. Normally it +// We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it // would get lowered as two constant loads and vector-packing move. -// mov.b16 %h1, 0x4000; -// mov.b16 %h2, 0x3C00; -// mov.b32 %hh2, {%h2, %h1}; // Instead we want just a constant move: // mov.b32 %hh2, 0x40003C00 -// -// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0 -// generates good SASS in both cases. SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); - if (!(Isv2x16VT(VT))) + if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) + return Op; + + if (!llvm::all_of(Op->ops(), [](SDValue Operand) { + return Operand->isUndef() || isa(Operand) || + isa(Operand); + })) return Op; - APInt E0; - APInt E1; - if (VT == MVT::v2f16 || VT == MVT::v2bf16) { - if (!(isa(Op->getOperand(0)) && - isa(Op->getOperand(1)))) - return Op; - - E0 = cast(Op->getOperand(0)) - ->getValueAPF() - .bitcastToAPInt(); - E1 = cast(Op->getOperand(1)) - ->getValueAPF() - .bitcastToAPInt(); - } else { - assert(VT == MVT::v2i16); - if (!(isa(Op->getOperand(0)) && - isa(Op->getOperand(1)))) - return Op; - E0 = cast(Op->getOperand(0))->getAPIntValue(); - E1 = cast(Op->getOperand(1))->getAPIntValue(); + // Get value or the Nth operand as an APInt(32). Undef values treated as 0. + auto GetOperand = [](SDValue Op, int N) -> APInt { + const SDValue &Operand = Op->getOperand(N); + EVT VT = Op->getValueType(0); + if (Operand->isUndef()) + return APInt(32, 0); + APInt Value; + if (VT == MVT::v2f16 || VT == MVT::v2bf16) + Value = cast(Operand)->getValueAPF().bitcastToAPInt(); + else if (VT == MVT::v2i16 || VT == MVT::v4i8) + Value = cast(Operand)->getAPIntValue(); + else + llvm_unreachable("Unsupported type"); + return Value.zext(32); + }; + APInt Value; + if (Isv2x16VT(VT)) { + Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16); + } else if (VT == MVT::v4i8) { + Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) | + GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24); + } else { + llvm_unreachable("Unsupported type"); } - SDValue Const = - DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); + SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32); return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const); } @@ -5262,11 +5268,12 @@ static SDValue PerformEXTRACTCombine(SDNode *N, SDValue Vector = N->getOperand(0); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && - IsPTXVectorType(VectorVT.getSimpleVT()) && VectorVT != MVT::v4i8) + IsPTXVectorType(VectorVT.getSimpleVT())) return SDValue(); // Native vector loads already combine nicely w/ // extract_vector_elt, except for v4i8. // Don't mess with singletons or v2*16 types, we already handle them OK. - if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT)) + if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) || + VectorVT == MVT::v4i8) return SDValue(); uint64_t VectorBits = VectorVT.getSizeInBits(); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 047161fb2027dee..307963aaa800b88 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1738,7 +1738,7 @@ def FUNSHFRCLAMP : // restriction in PTX? // // dest and src may be int32 or int64, but start and end are always int32. -multiclass BFX { +multiclass BFE { def rrr : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), @@ -1752,17 +1752,29 @@ multiclass BFX { (ins RC:$a, i32imm:$b, i32imm:$c), !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; } +multiclass BFI { + def rrr + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + def rri + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + def rii + : NVPTXInst<(outs RC:$f), + (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; +} let hasSideEffects = false in { - defm BFE_S32 : BFX<"bfe.s32", Int32Regs>; - defm BFE_U32 : BFX<"bfe.u32", Int32Regs>; - defm BFE_S64 : BFX<"bfe.s64", Int64Regs>; - defm BFE_U64 : BFX<"bfe.u64", Int64Regs>; - - defm BFI_S32 : BFX<"bfi.s32", Int32Regs>; - defm BFI_U32 : BFX<"bfi.u32", Int32Regs>; - defm BFI_S64 : BFX<"bfi.s64", Int64Regs>; - defm BFI_U64 : BFX<"bfi.u64", Int64Regs>; + defm BFE_S32 : BFE<"bfe.s32", Int32Regs>; + defm BFE_U32 : BFE<"bfe.u32", Int32Regs>; + defm BFE_S64 : BFE<"bfe.s64", Int64Regs>; + defm BFE_U64 : BFE<"bfe.u64", Int64Regs>; + + defm BFI_B32 : BFI<"bfi.b32", Int32Regs>; + defm BFI_B64 : BFI<"bfi.b64", Int64Regs>; } // Common byte extraction patterns @@ -1782,6 +1794,24 @@ def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; +def ExtractFromV4I8 : ComplexPattern; +def: Pat<(i32 (sext_inreg (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), i8)), + (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8)>; +def: Pat<(i32 (and (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), 255)), + (BFE_U32rii Int32Regs:$src, imm:$bitidx, 8)>; +def: Pat<(i16 (sext_inreg (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), i8)), + (CVT_s8_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; +def: Pat<(ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), + (CVT_s16_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; + + +def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), + (i16 Int16Regs:$c), (i16 Int16Regs:$d))), + (BFI_B32rii + (BFI_B32rii (CVT_u32_u16 Int16Regs:$d, CvtNONE), (CVT_u32_u16 Int16Regs:$c, CvtNONE), 8, 8), + (BFI_B32rii (CVT_u32_u16 Int16Regs:$b, CvtNONE), (CVT_u32_u16 Int16Regs:$a, CvtNONE), 8, 8), + 16, 16)>; + //----------------------------------- // Comparison instructions (setp, set) //----------------------------------- @@ -3293,10 +3323,6 @@ let hasSideEffects = false in { (ins Int16Regs:$s1, Int16Regs:$s2, Int16Regs:$s3, Int16Regs:$s4), "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; - def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), - "mov.b32 \t$d, {{$s1, $s2, $s3, $s4}};", []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2), "mov.b32 \t$d, {{$s1, $s2}};", []>; @@ -3312,10 +3338,6 @@ let hasSideEffects = false in { Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; - def I32toV4I8 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, - Int16Regs:$d3, Int16Regs:$d4), - (ins Int32Regs:$s), - "mov.b32 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), "mov.b32 \t{{$d1, $d2}}, $s;", []>; @@ -3351,6 +3373,9 @@ def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))), def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))), (I64toI32H Int64Regs:$s)>; +def: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))), + (CVT_INREG_s32_s16 Int32Regs:$src)>; + foreach vt = [v2f16, v2bf16, v2i16] in { def : Pat<(extractelt (vt Int32Regs:$src), 0), (I32toI16L Int32Regs:$src)>; @@ -3363,9 +3388,6 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; -def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), - (i16 Int16Regs:$c), (i16 Int16Regs:$d))), - (V4I8toI32 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c, Int16Regs:$d)>; // Count leading zeros let hasSideEffects = false in { diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index da07f973501c855..ed7dd45ab7b4502 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -18,7 +18,8 @@ define i16 @test_v2i8(i16 %a) { ; CHECK-LABEL: test_v4i8 ; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_param_0]; -; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R]]; +; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R0]]; ; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8; ; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9+]]], [[R1]]; ; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8; @@ -41,6 +42,58 @@ define i16 @test_v4i8(i32 %a) { ret i16 %r } +; CHECK-LABEL: test_v4i8_s32 +; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_s32_param_0]; +; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8; +; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8; +; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8; +; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]] +; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]] +; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]] +define i32 @test_v4i8_s32(i32 %a) { + %v = bitcast i32 %a to <4 x i8> + %r0 = extractelement <4 x i8> %v, i64 0 + %r1 = extractelement <4 x i8> %v, i64 1 + %r2 = extractelement <4 x i8> %v, i64 2 + %r3 = extractelement <4 x i8> %v, i64 3 + %r0i = sext i8 %r0 to i32 + %r1i = sext i8 %r1 to i32 + %r2i = sext i8 %r2 to i32 + %r3i = sext i8 %r3 to i32 + %r01 = add i32 %r0i, %r1i + %r23 = add i32 %r2i, %r3i + %r = add i32 %r01, %r23 + ret i32 %r +} + +; CHECK-LABEL: test_v4i8_u32 +; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_u32_param_0]; +; CHECK-DAG: bfe.u32 [[R0:%r[0-9+]]], [[R]], 0, 8; +; CHECK-DAG: bfe.u32 [[R1:%r[0-9+]]], [[R]], 8, 8; +; CHECK-DAG: bfe.u32 [[R2:%r[0-9+]]], [[R]], 16, 8; +; CHECK-DAG: bfe.u32 [[R3:%r[0-9+]]], [[R]], 24, 8; +; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]] +; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]] +; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]] +define i32 @test_v4i8_u32(i32 %a) { + %v = bitcast i32 %a to <4 x i8> + %r0 = extractelement <4 x i8> %v, i64 0 + %r1 = extractelement <4 x i8> %v, i64 1 + %r2 = extractelement <4 x i8> %v, i64 2 + %r3 = extractelement <4 x i8> %v, i64 3 + %r0i = zext i8 %r0 to i32 + %r1i = zext i8 %r1 to i32 + %r2i = zext i8 %r2 to i32 + %r3i = zext i8 %r3 to i32 + %r01 = add i32 %r0i, %r1i + %r23 = add i32 %r2i, %r3i + %r = add i32 %r01, %r23 + ret i32 %r +} + + + ; CHECK-LABEL: test_v8i8 ; CHECK: ld.param.u64 [[R:%rd[0-9+]]], [test_v8i8_param_0]; ; CHECK-DAG: cvt.s8.s64 [[E0:%rs[0-9+]]], [[R]]; diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index 5a22bbcf7416c17..684e4bc38d83de1 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -5,7 +5,7 @@ ; RUN: %if ptxas %{ \ ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \ ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | %ptxas-verify -arch=sm_53 \ +; RUN: | %ptxas-verify -arch=sm_90 \ ; RUN: %} ; ## No support for i16x2 instructions ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 74087be4834d966..97b1e38a3388413 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -60,20 +60,17 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<11>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_param_2]; ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r8, %r1, %r7; -; CHECK-NEXT: and.b32 %r9, %r3, %r8; -; CHECK-NEXT: or.b32 %r10, %r5, %r9; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; +; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: and.b32 %r8, %r3, %r7; +; CHECK-NEXT: or.b32 %r9, %r5, %r8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -85,20 +82,17 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; -; CHECK-NEXT: .reg .b32 %r<11>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_undef_param_2]; ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r7, {%rs1, %rs1, %rs2, %rs1}; -; CHECK-NEXT: xor.b32 %r8, %r1, %r7; -; CHECK-NEXT: and.b32 %r9, %r3, %r8; -; CHECK-NEXT: or.b32 %r10, %r5, %r9; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r10; +; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: and.b32 %r8, %r3, %r7; +; CHECK-NEXT: or.b32 %r9, %r5, %r8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -158,8 +152,7 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwin define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: out_v8i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_1]; @@ -167,15 +160,13 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v8i8_param_0]; ; CHECK-NEXT: and.b32 %r11, %r9, %r5; ; CHECK-NEXT: and.b32 %r13, %r10, %r6; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r15, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r16, %r6, %r15; -; CHECK-NEXT: xor.b32 %r17, %r5, %r15; -; CHECK-NEXT: and.b32 %r18, %r1, %r17; -; CHECK-NEXT: and.b32 %r19, %r2, %r16; -; CHECK-NEXT: or.b32 %r20, %r13, %r19; -; CHECK-NEXT: or.b32 %r21, %r11, %r18; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; +; CHECK-NEXT: xor.b32 %r15, %r6, -1; +; CHECK-NEXT: xor.b32 %r16, %r5, -1; +; CHECK-NEXT: and.b32 %r17, %r1, %r16; +; CHECK-NEXT: and.b32 %r18, %r2, %r15; +; CHECK-NEXT: or.b32 %r19, %r13, %r18; +; CHECK-NEXT: or.b32 %r20, %r11, %r17; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, @@ -213,8 +204,7 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-LABEL: out_v4i16_undef( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; -; CHECK-NEXT: .reg .b32 %r<22>; +; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v4i16_undef_param_1]; @@ -222,15 +212,13 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) n ; CHECK-NEXT: ld.param.v2.u32 {%r9, %r10}, [out_v4i16_undef_param_0]; ; CHECK-NEXT: and.b32 %r11, %r9, %r5; ; CHECK-NEXT: and.b32 %r13, %r10, %r6; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r15, {%rs2, %rs1}; -; CHECK-NEXT: xor.b32 %r16, %r6, %r15; -; CHECK-NEXT: xor.b32 %r17, %r5, -1; -; CHECK-NEXT: and.b32 %r18, %r1, %r17; -; CHECK-NEXT: and.b32 %r19, %r2, %r16; -; CHECK-NEXT: or.b32 %r20, %r13, %r19; -; CHECK-NEXT: or.b32 %r21, %r11, %r18; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r21, %r20}; +; CHECK-NEXT: xor.b32 %r15, %r6, -65536; +; CHECK-NEXT: xor.b32 %r16, %r5, -1; +; CHECK-NEXT: and.b32 %r17, %r1, %r16; +; CHECK-NEXT: and.b32 %r18, %r2, %r15; +; CHECK-NEXT: or.b32 %r19, %r13, %r18; +; CHECK-NEXT: or.b32 %r20, %r11, %r17; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, @@ -294,8 +282,7 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwin define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: out_v16i8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<42>; +; CHECK-NEXT: .reg .b32 %r<41>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_1]; @@ -305,21 +292,19 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-NEXT: and.b32 %r23, %r18, %r10; ; CHECK-NEXT: and.b32 %r25, %r19, %r11; ; CHECK-NEXT: and.b32 %r27, %r20, %r12; -; CHECK-NEXT: mov.u16 %rs1, -1; -; CHECK-NEXT: mov.b32 %r29, {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: xor.b32 %r30, %r12, %r29; -; CHECK-NEXT: xor.b32 %r31, %r11, %r29; -; CHECK-NEXT: xor.b32 %r32, %r10, %r29; -; CHECK-NEXT: xor.b32 %r33, %r9, %r29; -; CHECK-NEXT: and.b32 %r34, %r1, %r33; -; CHECK-NEXT: and.b32 %r35, %r2, %r32; -; CHECK-NEXT: and.b32 %r36, %r3, %r31; -; CHECK-NEXT: and.b32 %r37, %r4, %r30; -; CHECK-NEXT: or.b32 %r38, %r27, %r37; -; CHECK-NEXT: or.b32 %r39, %r25, %r36; -; CHECK-NEXT: or.b32 %r40, %r23, %r35; -; CHECK-NEXT: or.b32 %r41, %r21, %r34; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r41, %r40, %r39, %r38}; +; CHECK-NEXT: xor.b32 %r29, %r12, -1; +; CHECK-NEXT: xor.b32 %r30, %r11, -1; +; CHECK-NEXT: xor.b32 %r31, %r10, -1; +; CHECK-NEXT: xor.b32 %r32, %r9, -1; +; CHECK-NEXT: and.b32 %r33, %r1, %r32; +; CHECK-NEXT: and.b32 %r34, %r2, %r31; +; CHECK-NEXT: and.b32 %r35, %r3, %r30; +; CHECK-NEXT: and.b32 %r36, %r4, %r29; +; CHECK-NEXT: or.b32 %r37, %r27, %r36; +; CHECK-NEXT: or.b32 %r38, %r25, %r35; +; CHECK-NEXT: or.b32 %r39, %r23, %r34; +; CHECK-NEXT: or.b32 %r40, %r21, %r33; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r40, %r39, %r38, %r37}; ; CHECK-NEXT: ret; %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, >From e55bb97942124e2659f8132784131c74e4f6fd10 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 3 Oct 2023 17:41:20 -0700 Subject: [PATCH 3/9] Down the rabbit hole we go. To make things work consisstently for v4i8, we need to implement other vector ops. --- .../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 31 + .../NVPTX/MCTargetDesc/NVPTXInstPrinter.h | 2 + llvm/lib/Target/NVPTX/NVPTX.h | 12 + llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 11 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 168 ++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 5 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 168 ++- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 1237 +++++++++++++++++ 8 files changed, 1580 insertions(+), 54 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/i8x4-instructions.ll diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 5d27accdc198c1e..b7a20c351f5ff6f 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -309,3 +309,34 @@ void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum, const MCSymbol &Sym = cast(Expr)->getSymbol(); O << Sym.getName(); } + +void NVPTXInstPrinter::printPrmtMode(const MCInst *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MCOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + switch (Imm) { + default: + return; + case NVPTX::PTXPrmtMode::NONE: + break; + case NVPTX::PTXPrmtMode::F4E: + O << ".f4e"; + break; + case NVPTX::PTXPrmtMode::B4E: + O << ".b4e"; + break; + case NVPTX::PTXPrmtMode::RC8: + O << ".rc8"; + break; + case NVPTX::PTXPrmtMode::ECL: + O << ".ecl"; + break; + case NVPTX::PTXPrmtMode::ECR: + O << ".ecr"; + break; + case NVPTX::PTXPrmtMode::RC16: + O << ".rc16"; + break; + } +} diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index 49ad3f269229d5f..e6954f861cd10e2 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -47,6 +47,8 @@ class NVPTXInstPrinter : public MCInstPrinter { raw_ostream &O, const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); + void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); }; } diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index c5816b9266dfd9e..f7c8da372cec88c 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -180,6 +180,18 @@ enum CmpMode { FTZ_FLAG = 0x100 }; } + +namespace PTXPrmtMode { +enum PrmtMode { + NONE, + F4E, + B4E, + RC8, + ECL, + ECR, + RC16, +}; +} } void initializeNVPTXDAGToDAGISelPass(PassRegistry &); } // namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c3bcf8f05a278ad..f442188610715ee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3577,11 +3577,12 @@ bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, Vector->getValueType(0) == MVT::v4i8)) return false; - if (const ConstantSDNode *IdxConst = - dyn_cast(N->getOperand(1))) { - V = Vector; - BitOffset = CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, - SDLoc(N), MVT::i32); + SDLoc DL(N); + V = Vector; + SDValue Index = N->getOperand(1); + if (const ConstantSDNode *IdxConst = dyn_cast(Index)) { + BitOffset = + CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, DL, MVT::i32); return true; } return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 66dcdb53b136b96..b886b6e2ce5ddde 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -497,18 +497,31 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand); - // TODO: we should eventually lower it as PRMT instruction. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); + // Only logical ops can be done on v4i8 directly, others must be done + // elementwise. + setOperationAction( + {ISD::ADD, ISD::MUL, ISD::ABS, ISD::SMIN, + ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::CTPOP, + ISD::CTLZ, ISD::ADD, ISD::SUB, ISD::MUL, + ISD::SHL, ISD::SREM, ISD::UREM, ISD::SDIV, + ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, + ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, + ISD::UINT_TO_FP}, + MVT::v4i8, Expand); // Operations not directly supported by NVPTX. - for (MVT VT : - {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, MVT::f64, - MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}) { + for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, + MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8, + MVT::i32, MVT::i64}) { setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BR_CC, VT, Expand); } + // Some SIGN_EXTEND_INREG can be done using cvt instruction. // For others we will expand to a SHL/SRA pair. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); @@ -682,7 +695,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // We have some custom DAG combine patterns for these nodes setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL, - ISD::SREM, ISD::UREM, ISD::EXTRACT_VECTOR_ELT}); + ISD::SREM, ISD::UREM, ISD::EXTRACT_VECTOR_ELT, + ISD::VSELECT}); // setcc for f16x2 and bf16x2 needs special handling to prevent // legalizer's attempt to scalarize it due to v2i1 not being legal. @@ -891,6 +905,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::FUN_SHFR_CLAMP"; case NVPTXISD::IMAD: return "NVPTXISD::IMAD"; + case NVPTXISD::BFE: + return "NVPTXISD::BFE"; + case NVPTXISD::BFI: + return "NVPTXISD::BFI"; + case NVPTXISD::PRMT: + return "NVPTXISD::PRMT"; case NVPTXISD::SETP_F16X2: return "NVPTXISD::SETP_F16X2"; case NVPTXISD::Dummy: @@ -2163,18 +2183,39 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it // would get lowered as two constant loads and vector-packing move. // Instead we want just a constant move: -// mov.b32 %hh2, 0x40003C00 +// mov.b32 %r2, 0x40003C00 SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) return Op; + SDLoc DL(Op); + if (!llvm::all_of(Op->ops(), [](SDValue Operand) { return Operand->isUndef() || isa(Operand) || isa(Operand); - })) + })) { + // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us + // to optimize calculation of constant parts. + if (VT == MVT::v4i8) { + SDValue C8 = DAG.getConstant(8, DL, MVT::i32); + SDValue E01 = DAG.getNode( + NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32), + DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); + SDValue E012 = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), E01, + DAG.getConstant(16, DL, MVT::i32), C8); + SDValue E0123 = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), E012, + DAG.getConstant(24, DL, MVT::i32), C8); + return DAG.getNode(ISD::BITCAST, DL, VT, E0123); + } return Op; + } // Get value or the Nth operand as an APInt(32). Undef values treated as 0. auto GetOperand = [](SDValue Op, int N) -> APInt { @@ -2207,13 +2248,26 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Index = Op->getOperand(1); + SDValue Vector = Op->getOperand(0); + SDLoc DL(Op); + EVT VectorVT = Vector.getValueType(); + + if (VectorVT == MVT::v4i8) { + SDValue BFE = + DAG.getNode(NVPTXISD::BFE, DL, MVT::i32, + {Vector, + DAG.getNode(ISD::MUL, DL, MVT::i32, + DAG.getZExtOrTrunc(Index, DL, MVT::i32), + DAG.getConstant(8, DL, MVT::i32)), + DAG.getConstant(8, DL, MVT::i32)}); + return DAG.getZExtOrTrunc(BFE, DL, Op->getValueType(0)); + } + // Constant index will be matched by tablegen. if (isa(Index.getNode())) return Op; // Extract individual elements and select one of them. - SDValue Vector = Op->getOperand(0); - EVT VectorVT = Vector.getValueType(); assert(Isv2x16VT(VectorVT) && "Unexpected vector type."); EVT EltVT = VectorVT.getVectorElementType(); @@ -2226,6 +2280,34 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, ISD::CondCode::SETEQ); } +SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Vector = Op->getOperand(0); + EVT VectorVT = Vector.getValueType(); + + if (VectorVT != MVT::v4i8) + return Op; + SDLoc DL(Op); + SDValue Value = Op->getOperand(1); + if (Value->isUndef()) + return Vector; + + SDValue Index = Op->getOperand(2); + + SDValue BFI = + DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, + {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector, + DAG.getNode(ISD::MUL, DL, MVT::i32, + DAG.getZExtOrTrunc(Index, DL, MVT::i32), + DAG.getConstant(8, DL, MVT::i32)), + DAG.getConstant(8, DL, MVT::i32)}); + return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI); +} + +SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + return SDValue(); +} /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift /// amount, or @@ -2476,6 +2558,10 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Op; case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::STORE: @@ -4987,6 +5073,32 @@ static SDValue PerformANDCombine(SDNode *N, } SDValue AExt; + + // Convert BFE-> truncate i16 -> and 255 + // To just BFE-> truncate i16, as the value already has all the bits in the + // right places. + if (Val.getOpcode() == ISD::TRUNCATE) { + SDValue BFE = Val.getOperand(0); + if (BFE.getOpcode() != NVPTXISD::BFE) + return SDValue(); + + ConstantSDNode *BFEBits = dyn_cast(BFE.getOperand(0)); + if (!BFEBits) + return SDValue(); + uint64_t BFEBitsVal = BFEBits->getZExtValue(); + + ConstantSDNode *MaskCnst = dyn_cast(Mask); + if (!MaskCnst) { + // Not an AND with a constant + return SDValue(); + } + uint64_t MaskVal = MaskCnst->getZExtValue(); + + if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1) + return SDValue(); + // If we get here, the AND is unnecessary. Just replace it with the trunc + DCI.CombineTo(N, Val, false); + } // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and if (Val.getOpcode() == ISD::ANY_EXTEND) { AExt = Val; @@ -5266,6 +5378,7 @@ static SDValue PerformSETCCCombine(SDNode *N, static SDValue PerformEXTRACTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Vector = N->getOperand(0); + SDLoc DL(N); EVT VectorVT = Vector.getValueType(); if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && IsPTXVectorType(VectorVT.getSimpleVT())) @@ -5286,7 +5399,6 @@ static SDValue PerformEXTRACTCombine(SDNode *N, if (!Index || Index->getZExtValue() == 0) return SDValue(); - SDLoc DL(N); MVT IVT = MVT::getIntegerVT(VectorBits); EVT EltVT = VectorVT.getVectorElementType(); @@ -5309,6 +5421,38 @@ static SDValue PerformEXTRACTCombine(SDNode *N, return Result; } +static SDValue PerformVSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue VA = N->getOperand(1); + EVT VectorVT = VA.getValueType(); + if (VectorVT != MVT::v4i8) + return SDValue(); + + // We need to split vselect into individual per-element operations Because we + // use BFE/BFI instruction for byte extraction/insertion, we do end up with + // 32-bit values, so we may as well do comparison as i32 to avoid conversions + // to/from i16 normally used for i8 values. + SmallVector E; + SDLoc DL(N); + SDValue VCond = N->getOperand(0); + SDValue VB = N->getOperand(2); + for (int I = 0; I < 4; ++I) { + SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond, + DCI.DAG.getConstant(I, DL, MVT::i32)); + SDValue EA = DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA, + DCI.DAG.getConstant(I, DL, MVT::i32)), + DL, MVT::i32); + SDValue EB = DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB, + DCI.DAG.getConstant(I, DL, MVT::i32)), + DL, MVT::i32); + E.push_back(DCI.DAG.getAnyExtOrTrunc( + DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8)); + } + return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E); +} + SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel(); @@ -5334,6 +5478,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, return PerformStoreRetvalCombine(N); case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACTCombine(N, DCI); + case ISD::VSELECT: + return PerformVSELECTCombine(N, DCI); } return SDValue(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index cd1985cc4219bdf..5c7c10965e2f2ca 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -57,6 +57,9 @@ enum NodeType : unsigned { MUL_WIDE_UNSIGNED, IMAD, SETP_F16X2, + BFE, + BFI, + PRMT, Dummy, LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -590,6 +593,8 @@ class NVPTXTargetLowering : public TargetLowering { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 307963aaa800b88..2a34d050ed8f707 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -107,6 +107,21 @@ def VecElement : Operand { let PrintMethod = "printVecElement"; } +// PRMT modes +// These must match the enum in NVPTX.h +def PrmtNONE : PatLeaf<(i32 0x0)>; +def PrmtF4E : PatLeaf<(i32 0x1)>; +def PrmtB4E : PatLeaf<(i32 0x2)>; +def PrmtRC8 : PatLeaf<(i32 0x3)>; +def PrmtECL : PatLeaf<(i32 0x4)>; +def PrmtECR : PatLeaf<(i32 0x5)>; +def PrmtRC16 : PatLeaf<(i32 0x6)>; + +def PrmtMode : Operand { + let PrintMethod = "printPrmtMode"; +} + + //===----------------------------------------------------------------------===// // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// @@ -742,7 +757,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; // def v2f16imm : Operand; // defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; -foreach vt = [v2f16, v2bf16, v2i16] in { +foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>; } @@ -1738,46 +1753,119 @@ def FUNSHFRCLAMP : // restriction in PTX? // // dest and src may be int32 or int64, but start and end are always int32. -multiclass BFE { +def SDTBFE : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>; + +def SDTBFI : + SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def bfi : SDNode<"NVPTXISD::BFI", SDTBFI>; + +def SDTPRMT : + SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>,]>; +def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>; + +multiclass BFE { def rrr : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; def rri : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, i32imm:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>; def rii : NVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), - !strconcat(Instr, " \t$d, $a, $b, $c;"), []>; + !strconcat(Instr, " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>; } -multiclass BFI { - def rrr + +multiclass BFI { + def rrrr : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; - def rri + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + def rrri : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; - def rii + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + def rrii : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), - !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), []>; + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; + def irrr + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + def irri + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + def irii + : NVPTXInst<(outs RC:$f), + (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d), + !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), + [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; +} + +multiclass PRMT { + def rrr + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; + def rri + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, i32imm:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; + def rii + : NVPTXInst<(outs RC:$d), + (ins RC:$a, i32imm:$b, i32imm:$c, i32imm:$mode), + !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), + [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; } let hasSideEffects = false in { - defm BFE_S32 : BFE<"bfe.s32", Int32Regs>; - defm BFE_U32 : BFE<"bfe.u32", Int32Regs>; - defm BFE_S64 : BFE<"bfe.s64", Int64Regs>; - defm BFE_U64 : BFE<"bfe.u64", Int64Regs>; + defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>; + defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>; + defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>; + defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>; - defm BFI_B32 : BFI<"bfi.b32", Int32Regs>; - defm BFI_B64 : BFI<"bfi.b64", Int64Regs>; + defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>; + defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>; + + defm PRMT_B32 : PRMT; } -// Common byte extraction patterns + +// byte extraction + signed/unsigned extension to i32. +def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), i8)), + (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>; +def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), i8)), + (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; +def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), 255)), + (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>; +def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), 255)), + (BFE_U32rii Int32Regs:$s, imm:$o, 8)>; + +// byte extraction + signed extension to i16 +def : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8)), i8)), + (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; + + +// Byte extraction via shift/trunc/sext def : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)), (CVT_s8_s32 Int32Regs:$s, CvtNONE)>; def : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s), (i32 imm:$o))), i8)), @@ -1786,7 +1874,6 @@ def : Pat<(sext_inreg (srl (i32 Int32Regs:$s), (i32 imm:$o)), i8), (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; def : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))), (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>; - def : Pat<(sext_inreg (srl (i64 Int64Regs:$s), (i32 imm:$o)), i8), (BFE_S64rii Int64Regs:$s, imm:$o, 8)>; def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), @@ -1794,24 +1881,6 @@ def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; -def ExtractFromV4I8 : ComplexPattern; -def: Pat<(i32 (sext_inreg (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), i8)), - (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8)>; -def: Pat<(i32 (and (i32 (anyext (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)))), 255)), - (BFE_U32rii Int32Regs:$src, imm:$bitidx, 8)>; -def: Pat<(i16 (sext_inreg (ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), i8)), - (CVT_s8_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; -def: Pat<(ExtractFromV4I8 (v4i8 Int32Regs:$src), (i32 imm:$bitidx)), - (CVT_s16_s32 (BFE_S32rii Int32Regs:$src, imm:$bitidx, 8), CvtNONE)>; - - -def : Pat<(v4i8 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b), - (i16 Int16Regs:$c), (i16 Int16Regs:$d))), - (BFI_B32rii - (BFI_B32rii (CVT_u32_u16 Int16Regs:$d, CvtNONE), (CVT_u32_u16 Int16Regs:$c, CvtNONE), 8, 8), - (BFI_B32rii (CVT_u32_u16 Int16Regs:$b, CvtNONE), (CVT_u32_u16 Int16Regs:$a, CvtNONE), 8, 8), - 16, 16)>; - //----------------------------------- // Comparison instructions (setp, set) //----------------------------------- @@ -2141,6 +2210,29 @@ def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +// comparisons of i8 extracted with BFE as i32 +def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>; +def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>; +def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>; +def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)), + (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>; + +def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGTU)>; +def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGEU)>; +def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLTU)>; +def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLEU)>; +def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>; +def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>; + // i1 compare -> i32 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll new file mode 100644 index 000000000000000..3b13ac02a7b923b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -0,0 +1,1237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; ## Support i16x2 instructions +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \ +; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ +; RUN: | FileCheck -allow-deprecated-dag-overlap %s +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 \ +; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ +; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: %} + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define <4 x i8> @test_ret_const() #0 { +; CHECK-LABEL: test_ret_const( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u32 %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <4 x i8> +} + +define i8 @test_extract_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_0_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 0 + ret i8 %e +} + +define i8 @test_extract_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_1_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 8, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 1 + ret i8 %e +} + +define i8 @test_extract_2(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_2_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 16, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 2 + ret i8 %e +} + +define i8 @test_extract_3(<4 x i8> %a) #0 { +; CHECK-LABEL: test_extract_3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_3_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i32 3 + ret i8 %e +} + +define i8 @test_extract_i(<4 x i8> %a, i64 %idx) #0 { +; CHECK-LABEL: test_extract_i( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test_extract_i_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_extract_i_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; +; CHECK-NEXT: shl.b32 %r3, %r2, 3; +; CHECK-NEXT: bfe.u32 %r4, %r1, %r3, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: ret; + %e = extractelement <4 x i8> %a, i64 %idx + ret i8 %e +} + +define <4 x i8> @test_add(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_add( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_add_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_add_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: add.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: add.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: add.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = add <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_add_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_add_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<9>; +; CHECK-NEXT: .reg .b32 %r<14>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_add_imm_0_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; +; CHECK-NEXT: add.s16 %rs4, %rs3, 2; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, 3; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: add.s16 %rs8, %rs7, 4; +; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; +; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: ret; + %r = add <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_add_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_add_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<9>; +; CHECK-NEXT: .reg .b32 %r<14>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_add_imm_1_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs2; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; +; CHECK-NEXT: add.s16 %rs4, %rs3, 2; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs4; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: add.s16 %rs6, %rs5, 3; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: add.s16 %rs8, %rs7, 4; +; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; +; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: ret; + %r = add <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_sub(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_sub( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_sub_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_sub_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: sub.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: sub.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: sub.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: sub.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = sub <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_smax( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_smax_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_smax_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.gt.s32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.gt.s32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.gt.s32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.gt.s32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp sgt <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_umax( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_umax_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_umax_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.gtu.u32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.gtu.u32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.gtu.u32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.gtu.u32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp ugt <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_smin( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_smin_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_smin_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.le.s32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.le.s32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.le.s32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.le.s32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp sle <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_umin( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_umin_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_umin_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: setp.leu.u32 %p1, %r3, %r4; +; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: setp.leu.u32 %p2, %r5, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: setp.leu.u32 %p3, %r7, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: setp.leu.u32 %p4, %r9, %r10; +; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; +; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.b32 %r14, %r5, %r6, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.b32 %r16, %r3, %r4, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %cmp = icmp ule <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { +; CHECK-LABEL: test_eq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<24>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_eq_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_eq_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_eq_param_0]; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; +; CHECK-NEXT: setp.eq.u32 %p1, %r5, %r4; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: setp.eq.u32 %p2, %r7, %r6; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: setp.eq.u32 %p3, %r9, %r8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r11, %r1, 0, 8; +; CHECK-NEXT: setp.eq.u32 %p4, %r11, %r10; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r3, 8, 8; +; CHECK-NEXT: selp.b32 %r15, %r9, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r16, %r15, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r3, 16, 8; +; CHECK-NEXT: selp.b32 %r18, %r7, %r17, %p2; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r16, 16, 8; +; CHECK-NEXT: bfe.s32 %r20, %r3, 24, 8; +; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; +; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: ret; + %cmp = icmp eq <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c + ret <4 x i8> %r +} + +define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { +; CHECK-LABEL: test_ne( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<24>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_ne_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_ne_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_ne_param_0]; +; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r5, %r4; +; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r7, %r6; +; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r9, %r8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r11, %r1, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r11, %r10; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r3, 8, 8; +; CHECK-NEXT: selp.b32 %r15, %r9, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r16, %r15, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r3, 16, 8; +; CHECK-NEXT: selp.b32 %r18, %r7, %r17, %p2; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r16, 16, 8; +; CHECK-NEXT: bfe.s32 %r20, %r3, 24, 8; +; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; +; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: ret; + %cmp = icmp ne <4 x i8> %a, %b + %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c + ret <4 x i8> %r +} + +define <4 x i8> @test_mul(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_mul( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_mul_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_mul_param_0]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; +; CHECK-NEXT: mul.lo.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; +; CHECK-NEXT: mul.lo.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r11; +; CHECK-NEXT: mul.lo.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.u16.u32 %rs11, %r15; +; CHECK-NEXT: mul.lo.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: ret; + %r = mul <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_or(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_or( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_or_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_or_param_0]; +; CHECK-NEXT: or.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = or <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_or_computed(i8 %a) { +; CHECK-LABEL: test_or_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_or_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: or.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = or <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_or_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_or_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_0_param_0]; +; CHECK-NEXT: or.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = or <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_or_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_or_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_1_param_0]; +; CHECK-NEXT: or.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = or <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_xor(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_xor( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_xor_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_xor_param_0]; +; CHECK-NEXT: xor.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = xor <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_computed(i8 %a) { +; CHECK-LABEL: test_xor_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_xor_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: xor.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = xor <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_xor_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_0_param_0]; +; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = xor <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_xor_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_xor_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_1_param_0]; +; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = xor <4 x i8> %a, + ret <4 x i8> %r +} + +define <4 x i8> @test_and(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_and( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r3, [test_and_param_1]; +; CHECK-NEXT: ld.param.u32 %r4, [test_and_param_0]; +; CHECK-NEXT: and.b32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: ret; + %r = and <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <4 x i8> @test_and_computed(i8 %a) { +; CHECK-LABEL: test_and_computed( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_and_computed_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 0, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 0, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; +; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; +; CHECK-NEXT: and.b32 %r8, %r6, %r4; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %r = and <4 x i8> %ins.1, %ins.0 + ret <4 x i8> %r +} + +define <4 x i8> @test_and_imm_0(<4 x i8> %a) #0 { +; CHECK-LABEL: test_and_imm_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_0_param_0]; +; CHECK-NEXT: and.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = and <4 x i8> , %a + ret <4 x i8> %r +} + +define <4 x i8> @test_and_imm_1(<4 x i8> %a) #0 { +; CHECK-LABEL: test_and_imm_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_1_param_0]; +; CHECK-NEXT: and.b32 %r2, %r1, 67305985; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = and <4 x i8> %a, + ret <4 x i8> %r +} + +define void @test_ldst_v2i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v2i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v2i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v2i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <4 x i8>, ptr %a + store <4 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v3i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v3i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v3i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v3i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u16 [%rd2], %r1; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: st.u8 [%rd2+2], %r3; +; CHECK-NEXT: ret; + %t1 = load <3 x i8>, ptr %a + store <3 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v4i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <4 x i8>, ptr %a + store <4 x i8> %t1, ptr %b, align 16 + ret void +} + +define void @test_ldst_v8i8(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v8i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v8i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v8i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd1+4]; +; CHECK-NEXT: st.u32 [%rd2+4], %r2; +; CHECK-NEXT: st.u32 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <8 x i8>, ptr %a + store <8 x i8> %t1, ptr %b, align 16 + ret void +} + +declare <4 x i8> @test_callee(<4 x i8> %a, <4 x i8> %b) #0 + +define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_call( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_call_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_call_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = call <4 x i8> @test_callee(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %r +} + +define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_call_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_call_flipped_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_call_flipped_param_0]; +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) + ret <4 x i8> %r +} + +define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_tailcall_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_tailcall_flipped_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_tailcall_flipped_param_0]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .reg .b32 temp_param_reg; +; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: .param .align 4 .b8 param1[4]; +; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: test_callee, +; CHECK-NEXT: ( +; CHECK-NEXT: param0, +; CHECK-NEXT: param1 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = tail call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) + ret <4 x i8> %r +} + +define <4 x i8> @test_select(<4 x i8> %a, <4 x i8> %b, i1 zeroext %c) #0 { +; CHECK-LABEL: test_select( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2]; +; CHECK-NEXT: and.b16 %rs2, %rs1, 1; +; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_param_0]; +; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %r = select i1 %c, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) #0 { +; CHECK-LABEL: test_select_cc( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<29>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r4, [test_select_cc_param_3]; +; CHECK-NEXT: ld.param.u32 %r3, [test_select_cc_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_cc_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_cc_param_0]; +; CHECK-NEXT: bfe.s32 %r5, %r4, 24, 8; +; CHECK-NEXT: bfe.s32 %r6, %r3, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r6, %r5; +; CHECK-NEXT: bfe.s32 %r7, %r4, 16, 8; +; CHECK-NEXT: bfe.s32 %r8, %r3, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r8, %r7; +; CHECK-NEXT: bfe.s32 %r9, %r4, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r3, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r10, %r9; +; CHECK-NEXT: bfe.s32 %r11, %r4, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r3, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r12, %r11; +; CHECK-NEXT: bfe.s32 %r13, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r14, %r1, 0, 8; +; CHECK-NEXT: selp.b32 %r15, %r14, %r13, %p4; +; CHECK-NEXT: bfe.s32 %r16, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r17, %r1, 8, 8; +; CHECK-NEXT: selp.b32 %r18, %r17, %r16, %p3; +; CHECK-NEXT: bfi.b32 %r19, %r18, %r15, 8, 8; +; CHECK-NEXT: bfe.s32 %r20, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r21, %r1, 16, 8; +; CHECK-NEXT: selp.b32 %r22, %r21, %r20, %p2; +; CHECK-NEXT: bfi.b32 %r23, %r22, %r19, 16, 8; +; CHECK-NEXT: bfe.s32 %r24, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r25, %r1, 24, 8; +; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p1; +; CHECK-NEXT: bfi.b32 %r27, %r26, %r23, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r27; +; CHECK-NEXT: ret; + %cc = icmp ne <4 x i8> %c, %d + %r = select <4 x i1> %cc, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + +define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b, +; CHECK-LABEL: test_select_cc_i32_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<23>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [test_select_cc_i32_i8_param_1]; +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [test_select_cc_i32_i8_param_0]; +; CHECK-NEXT: ld.param.u32 %r10, [test_select_cc_i32_i8_param_3]; +; CHECK-NEXT: ld.param.u32 %r9, [test_select_cc_i32_i8_param_2]; +; CHECK-NEXT: bfe.s32 %r11, %r10, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r9, 0, 8; +; CHECK-NEXT: setp.ne.u32 %p1, %r12, %r11; +; CHECK-NEXT: bfe.s32 %r13, %r10, 8, 8; +; CHECK-NEXT: bfe.s32 %r14, %r9, 8, 8; +; CHECK-NEXT: setp.ne.u32 %p2, %r14, %r13; +; CHECK-NEXT: bfe.s32 %r15, %r10, 16, 8; +; CHECK-NEXT: bfe.s32 %r16, %r9, 16, 8; +; CHECK-NEXT: setp.ne.u32 %p3, %r16, %r15; +; CHECK-NEXT: bfe.s32 %r17, %r10, 24, 8; +; CHECK-NEXT: bfe.s32 %r18, %r9, 24, 8; +; CHECK-NEXT: setp.ne.u32 %p4, %r18, %r17; +; CHECK-NEXT: selp.b32 %r19, %r4, %r8, %p4; +; CHECK-NEXT: selp.b32 %r20, %r3, %r7, %p3; +; CHECK-NEXT: selp.b32 %r21, %r2, %r6, %p2; +; CHECK-NEXT: selp.b32 %r22, %r1, %r5, %p1; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r22, %r21, %r20, %r19}; +; CHECK-NEXT: ret; + <4 x i8> %c, <4 x i8> %d) #0 { + %cc = icmp ne <4 x i8> %c, %d + %r = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %r +} + +define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b, +; CHECK-LABEL: test_select_cc_i8_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<27>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_select_cc_i8_i32_param_3]; +; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_select_cc_i8_i32_param_2]; +; CHECK-NEXT: ld.param.u32 %r2, [test_select_cc_i8_i32_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_select_cc_i8_i32_param_0]; +; CHECK-NEXT: setp.ne.s32 %p1, %r6, %r10; +; CHECK-NEXT: setp.ne.s32 %p2, %r5, %r9; +; CHECK-NEXT: setp.ne.s32 %p3, %r4, %r8; +; CHECK-NEXT: setp.ne.s32 %p4, %r3, %r7; +; CHECK-NEXT: bfe.s32 %r11, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r12, %r1, 0, 8; +; CHECK-NEXT: selp.b32 %r13, %r12, %r11, %p4; +; CHECK-NEXT: bfe.s32 %r14, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r15, %r1, 8, 8; +; CHECK-NEXT: selp.b32 %r16, %r15, %r14, %p3; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 8, 8; +; CHECK-NEXT: bfe.s32 %r18, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r19, %r1, 16, 8; +; CHECK-NEXT: selp.b32 %r20, %r19, %r18, %p2; +; CHECK-NEXT: bfi.b32 %r21, %r20, %r17, 16, 8; +; CHECK-NEXT: bfe.s32 %r22, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r23, %r1, 24, 8; +; CHECK-NEXT: selp.b32 %r24, %r23, %r22, %p1; +; CHECK-NEXT: bfi.b32 %r25, %r24, %r21, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r25; +; CHECK-NEXT: ret; + <4 x i32> %c, <4 x i32> %d) #0 { + %cc = icmp ne <4 x i32> %c, %d + %r = select <4 x i1> %cc, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %r +} + + +define <4 x i8> @test_trunc_2xi32(<4 x i32> %a) #0 { +; CHECK-LABEL: test_trunc_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [test_trunc_2xi32_param_0]; +; CHECK-NEXT: bfi.b32 %r5, %r2, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r6, %r3, %r5, 16, 8; +; CHECK-NEXT: bfi.b32 %r7, %r4, %r6, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: ret; + %r = trunc <4 x i32> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i8> @test_trunc_2xi64(<4 x i64> %a) #0 { +; CHECK-LABEL: test_trunc_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [test_trunc_2xi64_param_0+16]; +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [test_trunc_2xi64_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; +; CHECK-NEXT: cvt.u32.u64 %r2, %rd2; +; CHECK-NEXT: bfi.b32 %r3, %r2, %r1, 8, 8; +; CHECK-NEXT: cvt.u32.u64 %r4, %rd3; +; CHECK-NEXT: bfi.b32 %r5, %r4, %r3, 16, 8; +; CHECK-NEXT: cvt.u32.u64 %r6, %rd4; +; CHECK-NEXT: bfi.b32 %r7, %r6, %r5, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: ret; + %r = trunc <4 x i64> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i32> @test_zext_2xi32(<4 x i8> %a) #0 { +; CHECK-LABEL: test_zext_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_zext_2xi32_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; +; CHECK-NEXT: bfe.u32 %r3, %r1, 16, 8; +; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8; +; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r5, %r4, %r3, %r2}; +; CHECK-NEXT: ret; + %r = zext <4 x i8> %a to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i64> @test_zext_2xi64(<4 x i8> %a) #0 { +; CHECK-LABEL: test_zext_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_zext_2xi64_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: and.b64 %rd2, %rd1, 255; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: cvt.u64.u32 %rd3, %r3; +; CHECK-NEXT: and.b64 %rd4, %rd3, 255; +; CHECK-NEXT: bfe.s32 %r4, %r1, 8, 8; +; CHECK-NEXT: cvt.u64.u32 %rd5, %r4; +; CHECK-NEXT: and.b64 %rd6, %rd5, 255; +; CHECK-NEXT: bfe.s32 %r5, %r1, 0, 8; +; CHECK-NEXT: cvt.u64.u32 %rd7, %r5; +; CHECK-NEXT: and.b64 %rd8, %rd7, 255; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd8, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd4, %rd2}; +; CHECK-NEXT: ret; + %r = zext <4 x i8> %a to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i8> @test_bitcast_i32_to_2xi8(i32 %a) #0 { +; CHECK-LABEL: test_bitcast_i32_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_i32_to_2xi8_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + %r = bitcast i32 %a to <4 x i8> + ret <4 x i8> %r +} + +define i32 @test_bitcast_2xi8_to_i32(<4 x i8> %a) #0 { +; CHECK-LABEL: test_bitcast_2xi8_to_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_bitcast_2xi8_to_i32_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: ret; + %r = bitcast <4 x i8> %a to i32 + ret i32 %r +} + +define <2 x half> @test_bitcast_2xi8_to_2xhalf(i8 %a) #0 { +; CHECK-LABEL: test_bitcast_2xi8_to_2xhalf( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_bitcast_2xi8_to_2xhalf_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: bfi.b32 %r2, 5, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r3, 6, %r2, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, 7, %r3, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: ret; + %ins.0 = insertelement <4 x i8> undef, i8 %a, i32 0 + %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 + %ins.2 = insertelement <4 x i8> %ins.1, i8 6, i32 2 + %ins.3 = insertelement <4 x i8> %ins.2, i8 7, i32 3 + %r = bitcast <4 x i8> %ins.3 to <2 x half> + ret <2 x half> %r +} + + +define <4 x i8> @test_shufflevector(<4 x i8> %a) #0 { +; CHECK-LABEL: test_shufflevector( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_param_0]; +; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; +; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; +; CHECK-NEXT: bfi.b32 %r4, %r3, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r5, %r1, 8, 8; +; CHECK-NEXT: bfi.b32 %r6, %r5, %r4, 16, 8; +; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: ret; + %s = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> + ret <4 x i8> %s +} + +define <4 x i8> @test_insertelement(<4 x i8> %a, i8 %x) #0 { +; CHECK-LABEL: test_insertelement( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [test_insertelement_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_insertelement_param_0]; +; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; +; CHECK-NEXT: bfi.b32 %r3, %r2, %r1, 8, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %i = insertelement <4 x i8> %a, i8 %x, i64 1 + ret <4 x i8> %i +} + +define <4 x i8> @test_fptosi_2xhalf_to_2xi8(<4 x half> %a) #0 { +; CHECK-LABEL: test_fptosi_2xhalf_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [test_fptosi_2xhalf_to_2xi8_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs3, %rs2; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs4, %rs1; +; CHECK-NEXT: mov.b32 %r5, {%rs4, %rs3}; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5; +; CHECK-NEXT: cvt.u32.u16 %r6, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs6; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 8, 8; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs9, %rs8; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs10, %rs7; +; CHECK-NEXT: mov.b32 %r9, {%rs10, %rs9}; +; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r9; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs11; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: ret; + %r = fptosi <4 x half> %a to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 { +; CHECK-LABEL: test_fptoui_2xhalf_to_2xi8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [test_fptoui_2xhalf_to_2xi8_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs3, %rs2; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs4, %rs1; +; CHECK-NEXT: mov.b32 %r5, {%rs4, %rs3}; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5; +; CHECK-NEXT: cvt.u32.u16 %r6, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs6; +; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 8, 8; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs9, %rs8; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs10, %rs7; +; CHECK-NEXT: mov.b32 %r9, {%rs10, %rs9}; +; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r9; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs11; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: ret; + %r = fptoui <4 x half> %a to <4 x i8> + ret <4 x i8> %r +} + +attributes #0 = { nounwind } >From 655c6d5bef8f016335643ad75465d22e216168e0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 14:22:16 -0700 Subject: [PATCH 4/9] Added vector_shuffle lowering to PRMT. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 17 +++++++++++- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 6 ++-- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 29 ++++++++++++++------ 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b886b6e2ce5ddde..701d9912150d955 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2306,7 +2306,22 @@ SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - return SDValue(); + SDValue V1 = Op.getOperand(0); + EVT VectorVT = V1.getValueType(); + if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8) + return Op; + + // Lower shuffle to PRMT instruction. + const ShuffleVectorSDNode *SVN = cast(Op.getNode()); + SDValue V2 = Op.getOperand(1); + uint32_t Selector = 0; + for (auto I: llvm::enumerate(SVN->getMask())) + Selector |= (I.value() << (I.index() * 4)); + + SDLoc DL(Op); + return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2, + DAG.getConstant(Selector, DL, MVT::i32), + DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32)); } /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 2a34d050ed8f707..9d0bcbf3e8f50dc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1822,17 +1822,17 @@ multiclass BFI { multiclass PRMT { def rrr : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, Int32Regs:$c, i32imm:$mode), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; def rri : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, i32imm:$c, i32imm:$mode), + (ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; def rii : NVPTXInst<(outs RC:$d), - (ins RC:$a, i32imm:$b, i32imm:$c, i32imm:$mode), + (ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; } diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 3b13ac02a7b923b..97e33c2f7eefc26 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1138,23 +1138,34 @@ define <2 x half> @test_bitcast_2xi8_to_2xhalf(i8 %a) #0 { define <4 x i8> @test_shufflevector(<4 x i8> %a) #0 { ; CHECK-LABEL: test_shufflevector( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_param_0]; -; CHECK-NEXT: bfe.s32 %r2, %r1, 24, 8; -; CHECK-NEXT: bfe.s32 %r3, %r1, 16, 8; -; CHECK-NEXT: bfi.b32 %r4, %r3, %r2, 8, 8; -; CHECK-NEXT: bfe.s32 %r5, %r1, 8, 8; -; CHECK-NEXT: bfi.b32 %r6, %r5, %r4, 16, 8; -; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; -; CHECK-NEXT: bfi.b32 %r8, %r7, %r6, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: // implicit-def: %r3 +; CHECK-NEXT: prmt.b32 %r2, %r1, %r3, 291; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; ; CHECK-NEXT: ret; %s = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> ret <4 x i8> %s } +define <4 x i8> @test_shufflevector_2(<4 x i8> %a, <4 x i8> %b) #0 { +; CHECK-LABEL: test_shufflevector_2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r2, [test_shufflevector_2_param_1]; +; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_2_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r1, %r2, 9527; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: ret; + %s = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %s +} + + define <4 x i8> @test_insertelement(<4 x i8> %a, i8 %x) #0 { ; CHECK-LABEL: test_insertelement( ; CHECK: { >From f915e5b855ce969a234cf644413132fe1742fac0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 14:44:01 -0700 Subject: [PATCH 5/9] Address clang-format complaints. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 701d9912150d955..da78eebb42ed0d9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -521,7 +521,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BR_CC, VT, Expand); } - // Some SIGN_EXTEND_INREG can be done using cvt instruction. // For others we will expand to a SHL/SRA pair. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); @@ -2206,12 +2205,12 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); SDValue E012 = DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, - DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), E01, - DAG.getConstant(16, DL, MVT::i32), C8); + DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), + E01, DAG.getConstant(16, DL, MVT::i32), C8); SDValue E0123 = DAG.getNode(NVPTXISD::BFI, DL, MVT::i32, - DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), E012, - DAG.getConstant(24, DL, MVT::i32), C8); + DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), + E012, DAG.getConstant(24, DL, MVT::i32), C8); return DAG.getNode(ISD::BITCAST, DL, VT, E0123); } return Op; @@ -5414,7 +5413,6 @@ static SDValue PerformEXTRACTCombine(SDNode *N, if (!Index || Index->getZExtValue() == 0) return SDValue(); - MVT IVT = MVT::getIntegerVT(VectorBits); EVT EltVT = VectorVT.getVectorElementType(); EVT EltIVT = EltVT.changeTypeToInteger(); >From ef3d5dee67581fd9b9644cf1e0ac54514ee4a884 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 5 Oct 2023 15:31:58 -0700 Subject: [PATCH 6/9] Use .lo/ls/hi/hs suffixes for unsigned setp instructions. Removed unused code. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 18 ------------------ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 - llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 12 ++++++++---- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 16 ++++++++-------- 5 files changed, 17 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index f442188610715ee..68391cdb6ff172b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3570,24 +3570,6 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); } -bool NVPTXDAGToDAGISel::SelectExtractEltFromV4I8(SDValue N, SDValue &V, - SDValue &BitOffset) { - SDValue Vector = N->getOperand(0); - if (!(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Vector->getValueType(0) == MVT::v4i8)) - return false; - - SDLoc DL(N); - V = Vector; - SDValue Index = N->getOperand(1); - if (const ConstantSDNode *IdxConst = dyn_cast(Index)) { - BitOffset = - CurDAG->getTargetConstant(IdxConst->getZExtValue() * 8, DL, MVT::i32); - return true; - } - return false; -} - bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 34b5dd449ce086f..06922331f5e2059 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -95,7 +95,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { SDValue &Offset); bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectExtractEltFromV4I8(SDValue N, SDValue &Value, SDValue &Idx); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index da78eebb42ed0d9..8d7a29198d61a11 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2259,7 +2259,7 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, DAG.getZExtOrTrunc(Index, DL, MVT::i32), DAG.getConstant(8, DL, MVT::i32)), DAG.getConstant(8, DL, MVT::i32)}); - return DAG.getZExtOrTrunc(BFE, DL, Op->getValueType(0)); + return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0)); } // Constant index will be matched by tablegen. diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 9d0bcbf3e8f50dc..3c9d8167e689a56 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -76,6 +76,10 @@ def CmpLT : PatLeaf<(i32 2)>; def CmpLE : PatLeaf<(i32 3)>; def CmpGT : PatLeaf<(i32 4)>; def CmpGE : PatLeaf<(i32 5)>; +def CmpLO : PatLeaf<(i32 6)>; +def CmpLS : PatLeaf<(i32 7)>; +def CmpHI : PatLeaf<(i32 8)>; +def CmpHS : PatLeaf<(i32 9)>; def CmpEQU : PatLeaf<(i32 10)>; def CmpNEU : PatLeaf<(i32 11)>; def CmpLTU : PatLeaf<(i32 12)>; @@ -2221,13 +2225,13 @@ def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32R (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>; def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGTU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>; def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpGEU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>; def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLTU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>; def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), - (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLEU)>; + (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>; def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>; def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))), diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 97e33c2f7eefc26..641f2f36f95b353 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -293,16 +293,16 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_umax_param_0]; ; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; ; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; -; CHECK-NEXT: setp.gtu.u32 %p1, %r3, %r4; +; CHECK-NEXT: setp.hi.u32 %p1, %r3, %r4; ; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; ; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; -; CHECK-NEXT: setp.gtu.u32 %p2, %r5, %r6; +; CHECK-NEXT: setp.hi.u32 %p2, %r5, %r6; ; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; ; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; -; CHECK-NEXT: setp.gtu.u32 %p3, %r7, %r8; +; CHECK-NEXT: setp.hi.u32 %p3, %r7, %r8; ; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; ; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; -; CHECK-NEXT: setp.gtu.u32 %p4, %r9, %r10; +; CHECK-NEXT: setp.hi.u32 %p4, %r9, %r10; ; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; ; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; @@ -363,16 +363,16 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_umin_param_0]; ; CHECK-NEXT: bfe.s32 %r3, %r1, 24, 8; ; CHECK-NEXT: bfe.s32 %r4, %r2, 24, 8; -; CHECK-NEXT: setp.leu.u32 %p1, %r3, %r4; +; CHECK-NEXT: setp.ls.u32 %p1, %r3, %r4; ; CHECK-NEXT: bfe.s32 %r5, %r1, 16, 8; ; CHECK-NEXT: bfe.s32 %r6, %r2, 16, 8; -; CHECK-NEXT: setp.leu.u32 %p2, %r5, %r6; +; CHECK-NEXT: setp.ls.u32 %p2, %r5, %r6; ; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; ; CHECK-NEXT: bfe.s32 %r8, %r2, 8, 8; -; CHECK-NEXT: setp.leu.u32 %p3, %r7, %r8; +; CHECK-NEXT: setp.ls.u32 %p3, %r7, %r8; ; CHECK-NEXT: bfe.s32 %r9, %r1, 0, 8; ; CHECK-NEXT: bfe.s32 %r10, %r2, 0, 8; -; CHECK-NEXT: setp.leu.u32 %p4, %r9, %r10; +; CHECK-NEXT: setp.ls.u32 %p4, %r9, %r10; ; CHECK-NEXT: selp.b32 %r11, %r9, %r10, %p4; ; CHECK-NEXT: selp.b32 %r12, %r7, %r8, %p3; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; >From 9821e908e676d8eedbee7c07c90fb5aae4454f82 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 6 Oct 2023 12:22:41 -0700 Subject: [PATCH 7/9] Fixed calculation of constant v4i8 values. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++++ llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8d7a29198d61a11..6a62e228e8efb39 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2229,6 +2229,10 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, Value = cast(Operand)->getAPIntValue(); else llvm_unreachable("Unsupported type"); + // i8 values are carried around as i16, so we need to zero out upper bits, + // so they do not get in the way of combining individual byte values + if (VT == MVT::v4i8) + Value = Value.trunc(8); return Value.zext(32); }; APInt Value; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 641f2f36f95b353..c429bf23417f951 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -17,10 +17,10 @@ define <4 x i8> @test_ret_const() #0 { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u32 %r1, 67305985; +; CHECK-NEXT: mov.u32 %r1, -66911489; ; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; ; CHECK-NEXT: ret; - ret <4 x i8> + ret <4 x i8> } define i8 @test_extract_0(<4 x i8> %a) #0 { >From 3879bdb03da707fd0fc02e2f92d5c8733a52de1f Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 6 Oct 2023 14:17:39 -0700 Subject: [PATCH 8/9] Updated a test. --- .../CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 97b1e38a3388413..8633b09af04873c 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -89,7 +89,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi ; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [out_v4i8_undef_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r1; -; CHECK-NEXT: xor.b32 %r7, %r1, -1; +; CHECK-NEXT: xor.b32 %r7, %r1, -16711681; ; CHECK-NEXT: and.b32 %r8, %r3, %r7; ; CHECK-NEXT: or.b32 %r9, %r5, %r8; ; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; >From 899ab5a3ff06431091441ae3d3f5d136db76ab0e Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 6 Oct 2023 16:21:19 -0700 Subject: [PATCH 9/9] Fixed unaligned load/store of v4i8 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 7 +++--- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 24 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6a62e228e8efb39..8e3a80717ba0418 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2694,9 +2694,10 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); - // v2f16/v2bf16/v2i16 are legal, so we can't rely on legalizer to handle + // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle // unaligned loads and have to handle it here. - if (Isv2x16VT(Op.getValueType())) { + EVT VT = Op.getValueType(); + if (Isv2x16VT(VT) || VT == MVT::v4i8) { LoadSDNode *Load = cast(Op); EVT MemVT = Load->getMemoryVT(); if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), @@ -2741,7 +2742,7 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v2f16 is legal, so we can't rely on legalizer to handle unaligned // stores and have to handle it here. - if (Isv2x16VT(VT) && + if ((Isv2x16VT(VT) || VT == MVT::v4i8) && !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), VT, *Store->getMemOperand())) return expandUnalignedStore(Store, DAG); diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index c429bf23417f951..fd48313ad684847 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -757,6 +757,30 @@ define void @test_ldst_v4i8(ptr %a, ptr %b) { ret void } +define void @test_ldst_v4i8_unaligned(ptr %a, ptr %b) { +; CHECK-LABEL: test_ldst_v4i8_unaligned( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v4i8_unaligned_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v4i8_unaligned_param_0]; +; CHECK-NEXT: ld.u8 %r1, [%rd1]; +; CHECK-NEXT: ld.u8 %r2, [%rd1+1]; +; CHECK-NEXT: ld.u8 %r3, [%rd1+2]; +; CHECK-NEXT: ld.u8 %r4, [%rd1+3]; +; CHECK-NEXT: st.u8 [%rd2+3], %r4; +; CHECK-NEXT: st.u8 [%rd2+2], %r3; +; CHECK-NEXT: st.u8 [%rd2+1], %r2; +; CHECK-NEXT: st.u8 [%rd2], %r1; +; CHECK-NEXT: ret; + %t1 = load <4 x i8>, ptr %a, align 1 + store <4 x i8> %t1, ptr %b, align 1 + ret void +} + + define void @test_ldst_v8i8(ptr %a, ptr %b) { ; CHECK-LABEL: test_ldst_v8i8( ; CHECK: { From lldb-commits at lists.llvm.org Fri Oct 6 20:17:36 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:17:36 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cdd0.170a0220.9596a.f3a9@mx.google.com> https://github.com/PortalPete updated https://github.com/llvm/llvm-project/pull/68452 >From 08f4d4ae5afdedb0fbc6196272fcf8316de5f5ea Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] [lldb] Update the `po` alias to print Fix-Its to the console. Modifying `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 16 +++++++++-- .../Commands/CommandObjectExpression.cpp | 9 ++++--- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 5 ++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 5 ++++ 8 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..ea247436323db36 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,20 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + std::string fixed_expression; + + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << fixed_expression << "\n"; + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..0c7cebe6a020d87 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,12 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; } if (result_valobj_sp) { diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..ffd348e16a7e01c --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "dwim-print -O -- class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..6559860f0ddf17d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_Expression_FixIt_TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Fri Oct 6 20:21:35 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:21:35 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cebf.050a0220.41cfd.7110@mx.google.com> ================ @@ -43,6 +43,10 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { HandleArgumentCompletion(CompletionRequest &request, OptionElementVector &opt_element_vector) override; +protected: + /// Stores the expression after applying any Fix-Its. + std::string m_fixed_expression; ---------------- PortalPete wrote: Now fixed as a local variable. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 20:24:24 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:24:24 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cf68.170a0220.4620b.fb6a@mx.google.com> PortalPete wrote: > The code is not something I am familiar with, but I did notice that the PR title and the commit title are quite different. Not sure which one is better, but it is generally nice to have them match (in particular it is nice to add the `[lldb]` tag to the commit title to make `git log` nice to browse) Thanks for the tip, @felipepiovezan . All good now. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 20:25:16 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:25:16 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cf9c.170a0220.653b.fe32@mx.google.com> ================ @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; ---------------- PortalPete wrote: > Oh I see other places in the old code do the same... Yup! So is this good to go as it is? https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 20:26:45 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:26:45 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cff5.630a0220.f370d.69a6@mx.google.com> https://github.com/PortalPete updated https://github.com/llvm/llvm-project/pull/68452 >From f842928b5300c2ea98dbdc1bf3e3fe768dcf31ed Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] [lldb] Update the `po` alias to print Fix-Its to the console. Modifying `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 16 +++++++++-- .../Commands/CommandObjectExpression.cpp | 9 ++++--- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 5 ++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 5 ++++ 8 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..ea247436323db36 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,20 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + std::string fixed_expression; + + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << fixed_expression << "\n"; + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..0c7cebe6a020d87 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,12 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; } if (result_valobj_sp) { diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..e2a16f2787b0b3f --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "dwim-print -O -- class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..c60f1e0051d8c8e --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Fri Oct 6 20:26:54 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 20:26:54 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520cffe.170a0220.48dcb.3f88@mx.google.com> ================ @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPP_DWIM_Fixit_TestCase(TestBase): ---------------- PortalPete wrote: Thanks Dave! `TestCase` it is. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Fri Oct 6 22:25:32 2023 From: lldb-commits at lists.llvm.org (Pete Lawrence via lldb-commits) Date: Fri, 06 Oct 2023 22:25:32 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <6520ebcc.170a0220.2cbce.ff87@mx.google.com> https://github.com/PortalPete updated https://github.com/llvm/llvm-project/pull/68452 >From 6ba636bde2a51704025637f3c35d1b98ba6dd7f3 Mon Sep 17 00:00:00 2001 From: Pete Lawrence Date: Thu, 5 Oct 2023 14:22:35 -1000 Subject: [PATCH] [lldb] Update the `po` alias to print Fix-Its to the console. Modifying `po` alias to match outward FixIt behavior with `expression`. - Fix `po` alias so that it prints out a message when applying a FixIt, just like the `expression` command. - Add test cases for applying a FixIt with both `expression` command and `po` alias. - Reword console messages for readability. --- .../Commands/CommandObjectDWIMPrint.cpp | 16 +++++++++-- .../Commands/CommandObjectExpression.cpp | 9 ++++--- .../API/lang/cpp/dwim-print-fixit/Makefile | 3 +++ .../dwim-print-fixit/TestCppDWIMPrintFixIt.py | 27 +++++++++++++++++++ .../API/lang/cpp/dwim-print-fixit/main.cpp | 5 ++++ .../API/lang/cpp/expression-fixit/Makefile | 3 +++ .../TestCppExpressionFixIt.py | 26 ++++++++++++++++++ .../API/lang/cpp/expression-fixit/main.cpp | 5 ++++ 8 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py create mode 100644 lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp create mode 100644 lldb/test/API/lang/cpp/expression-fixit/Makefile create mode 100644 lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py create mode 100644 lldb/test/API/lang/cpp/expression-fixit/main.cpp diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 7b168eab9e02d44..ea247436323db36 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -172,8 +172,20 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + std::string fixed_expression; + + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << fixed_expression << "\n"; + } + if (expr_result == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) { StringRef flags; diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index e7e6e3820b99133..0c7cebe6a020d87 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -439,11 +439,12 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, ExpressionResults success = target.EvaluateExpression( expr, frame, result_valobj_sp, eval_options, &m_fixed_expression); - // We only tell you about the FixIt if we applied it. The compiler errors - // will suggest the FixIt if it parsed. + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { - error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n", - m_fixed_expression.c_str()); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; } if (result_valobj_sp) { diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py new file mode 100644 index 000000000000000..e2a16f2787b0b3f --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/TestCppDWIMPrintFixIt.py @@ -0,0 +1,27 @@ +""" +Tests whether the do-what-I-mean (DWIM) print `po` alias applies FixIts like `expr` does +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `po` command (alias) applies a FixIt " \ + "and prints it out to the console, " \ + "just like the `expression` command." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "dwim-print -O -- class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/dwim-print-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} diff --git a/lldb/test/API/lang/cpp/expression-fixit/Makefile b/lldb/test/API/lang/cpp/expression-fixit/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py new file mode 100644 index 000000000000000..c60f1e0051d8c8e --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/TestCppExpressionFixIt.py @@ -0,0 +1,26 @@ +""" +Tests whether the expression command applies FixIts +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + def test_with_run_command(self): + "Confirm that the `expression` command applies a FixIt " \ + "and prints it out to the console." + + self.build() + lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.cpp") + ) + + self.expect( + "expr class C { int i; void f() { []() { ++i; }(); } }; 1", + error = True, + substrs=["Applying Fix-It to expression", + "changing it to:", + "class C { int i; void f() { [this]() { ++i; }(); } }"], + ) diff --git a/lldb/test/API/lang/cpp/expression-fixit/main.cpp b/lldb/test/API/lang/cpp/expression-fixit/main.cpp new file mode 100644 index 000000000000000..e9cf11d18a6560d --- /dev/null +++ b/lldb/test/API/lang/cpp/expression-fixit/main.cpp @@ -0,0 +1,5 @@ +int main() { + long foo = 1234; + + return 0; // break here +} From lldb-commits at lists.llvm.org Sun Oct 8 16:35:22 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Sun, 08 Oct 2023 16:35:22 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65233cba.630a0220.b7378.1db3@mx.google.com> ================ @@ -172,8 +172,18 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, { auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + ExpressionResults expr_result = target.EvaluateExpression( + expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying + // Fix-It(s). + if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) { + Stream &error_stream = result.GetErrorStream(); + error_stream << " Applying Fix-It to expression, changing it to:\n"; + error_stream << " " << m_fixed_expression << "\n"; ---------------- adrian-prantl wrote: I think this could be a useful thing to look at unifying in a follow-up patch. https://github.com/llvm/llvm-project/pull/68452 From lldb-commits at lists.llvm.org Sun Oct 8 16:38:46 2023 From: lldb-commits at lists.llvm.org (Adrian Prantl via lldb-commits) Date: Sun, 08 Oct 2023 16:38:46 -0700 (PDT) Subject: [Lldb-commits] [lldb] [lldb] Fix `po` alias by printing fix-its to the console. (PR #68452) In-Reply-To: Message-ID: <65233d86.170a0220.c28bc.2009@mx.google.com> ================ @@ -173,7 +173,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; ExpressionResults expr_result = - target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options); + target.EvaluateExpression(expr, exe_scope, valobj_sp, eval_options, &m_fixed_expression); + + // Only mention Fix-Its if the command applies them. + // The compiler errors can address any parsing issues after applying Fix-It(s). ---------------- adrian-prantl wrote: Ah! The original comment makes more sense: I *think* what it's trying to say is: LLDB only displays fixits if the expression evaluator applied them. Any compiler diagnostics LLDB displays refer to the modified expression with the fixit applied. https://github.com/llvm/llvm-project/pull/68452