diff --git a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx index 4f679532c1c87..9ba5b9816fdbd 100644 --- a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx +++ b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx @@ -50,6 +50,11 @@ enum class ENTupleInspectorHist { kUncompressedSize }; +enum class ESchemaProfileFormat { + /// https://www.speedscope.app/file-format-schema.json + kSpeedscopeJSON +}; + // clang-format off /** \class ROOT::Experimental::RNTupleInspector @@ -493,6 +498,11 @@ public: { PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format which a + /// performance profile visualizer can render + void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output = std::cout) const; }; } // namespace Experimental } // namespace ROOT diff --git a/tree/ntupleutil/src/RNTupleInspector.cxx b/tree/ntupleutil/src/RNTupleInspector.cxx index 1a5b192f6ad53..24bab6fd53328 100644 --- a/tree/ntupleutil/src/RNTupleInspector.cxx +++ b/tree/ntupleutil/src/RNTupleInspector.cxx @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -565,3 +566,129 @@ void ROOT::Experimental::RNTupleInspector::PrintFieldTreeAsDot(const ROOT::RFiel if (isZeroField) output << "}"; } + +namespace { + +struct SpeedscopeFrame { + std::string fPrimaryString; + std::string fSecondaryString; + std::uint64_t fOpeningPosition = 0; + std::uint64_t fClosingPosition = 0; +}; + +static void PrintSpeedscopeFrames(const std::vector &frames, std::ostream &output) +{ + output << "{\n"; + output << " \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n"; + output << " \"shared\":{\n"; + output << " \"frames\":[\n"; + + for (std::size_t i = 0; i < frames.size(); ++i) { + output << " { \"name\":\"" << frames[i].fPrimaryString + << "\", \"file\":\"Type: " << frames[i].fSecondaryString + << ", Size: " << frames[i].fClosingPosition - frames[i].fOpeningPosition << "B\" }" + << (i + 1 < frames.size() ? ",\n" : "\n"); + } + + output << " ]\n"; + output << " },\n"; + output << " \"profiles\":[\n"; + output << " {\n"; + output << " \"type\":\"evented\",\n"; + output << " \"name\":\"Flattened Timeline\",\n"; + output << " \"unit\":\"bytes\",\n"; + output << " \"startValue\":0,\n"; + output << " \"endValue\":" << frames.back().fClosingPosition << ",\n"; + output << " \"events\":[\n"; + + bool first = true; + + // Parameter idx Index of the frame being processed + // Parameter limit + // - If the frame is not root: Closing Position of its father + // - If the frame is root: Closing Position of the last element of frames + // Returns the next index to be processed + std::function processRecursive = [&](std::size_t nextIdxToProcess, + std::uint32_t limit) -> std::size_t { + while (nextIdxToProcess < frames.size() && frames[nextIdxToProcess].fOpeningPosition < limit) { + const std::size_t currentIdx = nextIdxToProcess; + + if (!first) + output << ",\n"; + + output << " {\"type\":\"O\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].fOpeningPosition << "}"; + first = false; + + nextIdxToProcess = processRecursive(nextIdxToProcess + 1, frames[currentIdx].fClosingPosition); + + output << ",\n {\"type\":\"C\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].fClosingPosition << "}"; + } + return nextIdxToProcess; + }; + + processRecursive(0, frames.back().fClosingPosition); + + output << "\n ]\n"; + output << " }\n"; + output << " ]\n"; + output << "}\n"; +} +} // namespace + +void ROOT::Experimental::RNTupleInspector::PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output) const +{ + // There is only one format at the moment + assert(format == ESchemaProfileFormat::kSpeedscopeJSON); + + const auto &tupleDescriptor = GetDescriptor(); + ROOT::DescriptorId_t rootId = tupleDescriptor.GetFieldZeroId(); + const auto &rootFieldDescriptor = tupleDescriptor.GetFieldDescriptor(rootId); + + std::vector frames; + std::uint64_t positionCursor = 0; + + // Returns size of the visited field + auto visitFieldsRecursive = [&](auto &self, const ROOT::RFieldDescriptor &fieldDescriptor) -> std::size_t { + SpeedscopeFrame fieldSpeedscopeFrame; + fieldSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()); + fieldSpeedscopeFrame.fSecondaryString = fieldDescriptor.GetTypeName(); + fieldSpeedscopeFrame.fOpeningPosition = positionCursor; + frames.push_back(fieldSpeedscopeFrame); + + const std::size_t fieldSpeedscopeFrameIndex = frames.size() - 1; + + std::size_t subTreeSize = 0; + const auto &childIds = fieldDescriptor.GetLinkIds(); + + for (const auto &childFieldId : childIds) { + const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId); + subTreeSize += self(self, childFieldDescriptor); + } + + for (const auto &columnDescriptor : tupleDescriptor.GetColumnIterable(fieldDescriptor.GetId())) { + const auto &columnInfo = GetColumnInspector(columnDescriptor.GetPhysicalId()); + std::size_t columnSize = columnInfo.GetCompressedSize(); + + SpeedscopeFrame columnSpeedscopeFrame; + columnSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()) + + " [col#" + std::to_string(columnDescriptor.GetPhysicalId()) + "]"; + columnSpeedscopeFrame.fSecondaryString = + ROOT::Internal::RColumnElementBase::GetColumnTypeName(columnDescriptor.GetType()); + columnSpeedscopeFrame.fOpeningPosition = positionCursor; + positionCursor += columnSize; + columnSpeedscopeFrame.fClosingPosition = positionCursor; + frames.push_back(columnSpeedscopeFrame); + subTreeSize += columnSize; + } + + frames[fieldSpeedscopeFrameIndex].fClosingPosition = positionCursor; + + return subTreeSize; + }; + + visitFieldsRecursive(visitFieldsRecursive, rootFieldDescriptor); + + PrintSpeedscopeFrames(frames, output); +} diff --git a/tree/ntupleutil/test/ntuple_inspector.cxx b/tree/ntupleutil/test/ntuple_inspector.cxx index 5812a926eb9ee..7d5f439e9281b 100644 --- a/tree/ntupleutil/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/test/ntuple_inspector.cxx @@ -862,3 +862,59 @@ TEST(RNTupleInspector, FieldTreeAsDot) "int

Type: std::int32_t

ID: 1

>]\n}"; EXPECT_EQ(dot, expected); } + +TEST(RNTupleInspector, SchemaProfile) +{ + FileRaii fileGuard("test_schema_profile.root"); + { + auto model = RNTupleModel::Create(); + auto fieldFloat1 = model->MakeField("float1"); + auto fieldInt = model->MakeField("int"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + + for (int i = 0; i < 10; ++i) { + *fieldFloat1 = 3.14f * i; + *fieldInt = 42 * i; + writer->Fill(); + } + } + auto inspector = RNTupleInspector::Create("ntuple", fileGuard.GetPath()); + std::ostringstream schemaProfileStream; + inspector->PrintSchemaProfile(ROOT::Experimental::ESchemaProfileFormat::kSpeedscopeJSON, schemaProfileStream); + const std::string schemaProfile = schemaProfileStream.str(); + const std::string expected = R"({ + "$schema":"https://www.speedscope.app/file-format-schema.json", + "shared":{ + "frames":[ + { "name":"", "file":"Type: , Size: 80B" }, + { "name":"float1", "file":"Type: float, Size: 40B" }, + { "name":"float1 [col#0]", "file":"Type: SplitReal32, Size: 40B" }, + { "name":"int", "file":"Type: std::int32_t, Size: 40B" }, + { "name":"int [col#1]", "file":"Type: SplitInt32, Size: 40B" } + ] + }, + "profiles":[ + { + "type":"evented", + "name":"Flattened Timeline", + "unit":"bytes", + "startValue":0, + "endValue":80, + "events":[ + {"type":"O","frame":0,"at":0}, + {"type":"O","frame":1,"at":0}, + {"type":"O","frame":2,"at":0}, + {"type":"C","frame":2,"at":40}, + {"type":"C","frame":1,"at":40}, + {"type":"O","frame":3,"at":40}, + {"type":"O","frame":4,"at":40}, + {"type":"C","frame":4,"at":80}, + {"type":"C","frame":3,"at":80}, + {"type":"C","frame":0,"at":80} + ] + } + ] +} +)"; + EXPECT_EQ(schemaProfile, expected); +}