@@ -122,7 +122,7 @@ void DataInputDescriptor::addFileNameHolder(FileNameHolder* fn)
122122 mfilenames.emplace_back (fn);
123123}
124124
125- bool DataInputDescriptor::setFile (int counter, std::string_view origin)
125+ bool DataInputDescriptor::setFile (int counter, int wantedParentLevel, std::string_view origin)
126126{
127127 // no files left
128128 if (counter >= getNumberInputfiles ()) {
@@ -133,7 +133,9 @@ bool DataInputDescriptor::setFile(int counter, std::string_view origin)
133133 // of the filename. In the future we might expand this for proper rewriting of the
134134 // filename based on the origin and the original file information.
135135 std::string filename = mfilenames[counter]->fileName ;
136- if (!origin.starts_with (" AOD" )) {
136+ // In case we do not need to remap parent levels, the requested origin is what
137+ // drives the filename.
138+ if (wantedParentLevel == -1 && !origin.starts_with (" AOD" )) {
137139 filename = std::regex_replace (filename, std::regex (" [.]root$" ), fmt::format (" _{}.root" , origin));
138140 }
139141
@@ -146,7 +148,19 @@ bool DataInputDescriptor::setFile(int counter, std::string_view origin)
146148 closeInputFile ();
147149 }
148150
149- mCurrentFilesystem = std::make_shared<TFileFileSystem>(TFile::Open (filename.c_str ()), 50 * 1024 * 1024 , mFactory );
151+ TFile* tfile = nullptr ;
152+ bool externalFile = false ;
153+ for (auto & [name, f] : mContext .openFiles ) {
154+ if (name == filename) {
155+ tfile = f;
156+ externalFile = true ;
157+ break ;
158+ }
159+ }
160+ if (tfile == nullptr ) {
161+ tfile = TFile::Open (filename.c_str ());
162+ }
163+ mCurrentFilesystem = std::make_shared<TFileFileSystem>(tfile, 50 * 1024 * 1024 , mFactory , !externalFile);
150164 if (!mCurrentFilesystem .get ()) {
151165 throw std::runtime_error (fmt::format (" Couldn't open file \" {}\" !" , filename));
152166 }
@@ -218,11 +232,11 @@ bool DataInputDescriptor::setFile(int counter, std::string_view origin)
218232 return true ;
219233}
220234
221- uint64_t DataInputDescriptor::getTimeFrameNumber (int counter, int numTF, std::string_view origin )
235+ uint64_t DataInputDescriptor::getTimeFrameNumber (int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin )
222236{
223237
224238 // open file
225- if (!setFile (counter, origin )) {
239+ if (!setFile (counter, wantedParentLevel, wantedOrigin )) {
226240 return 0ul ;
227241 }
228242
@@ -234,10 +248,32 @@ uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF, std::st
234248 return (mfilenames[counter]->listOfTimeFrameNumbers )[numTF];
235249}
236250
237- arrow::dataset::FileSource DataInputDescriptor::getFileFolder (int counter, int numTF, std::string_view origin)
251+ std::pair<DataInputDescriptor*, int > DataInputDescriptor::navigateToLevel (int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
252+ {
253+ if (!setFile (counter, wantedParentLevel, wantedOrigin)) {
254+ return {nullptr , -1 };
255+ }
256+ auto folderName = fmt::format (" DF_{}" , mfilenames[counter]->listOfTimeFrameNumbers [numTF]);
257+ auto parentFile = getParentFile (counter, numTF, " " , wantedParentLevel, wantedOrigin);
258+ if (parentFile == nullptr ) {
259+ return {nullptr , -1 };
260+ }
261+ return {parentFile, parentFile->findDFNumber (0 , folderName)};
262+ }
263+
264+ arrow::dataset::FileSource DataInputDescriptor::getFileFolder (int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
238265{
266+ // If mapped to a parent level deeper than current, skip directly to the right level.
267+ if (wantedParentLevel != -1 && mLevel < wantedParentLevel) {
268+ auto [parentFile, parentNumTF] = navigateToLevel (counter, numTF, wantedParentLevel, wantedOrigin);
269+ if (parentFile == nullptr || parentNumTF == -1 ) {
270+ return {};
271+ }
272+ return parentFile->getFileFolder (0 , parentNumTF, wantedParentLevel, wantedOrigin);
273+ }
274+
239275 // open file
240- if (!setFile (counter, origin )) {
276+ if (!setFile (counter, wantedParentLevel, wantedOrigin )) {
241277 return {};
242278 }
243279
@@ -251,7 +287,7 @@ arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int n
251287 return {fmt::format (" DF_{}" , mfilenames[counter]->listOfTimeFrameNumbers [numTF]), mCurrentFilesystem };
252288}
253289
254- DataInputDescriptor* DataInputDescriptor::getParentFile (int counter, int numTF, std::string treename, std::string_view origin )
290+ DataInputDescriptor* DataInputDescriptor::getParentFile (int counter, int numTF, std::string treename, int wantedParentLevel, std::string_view wantedOrigin )
255291{
256292 if (!mParentFileMap ) {
257293 // This file has no parent map
@@ -288,7 +324,7 @@ DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF,
288324 mParentFile ->mdefaultFilenamesPtr = new std::vector<FileNameHolder*>;
289325 mParentFile ->mdefaultFilenamesPtr ->emplace_back (makeFileNameHolder (parentFileName->GetString ().Data ()));
290326 mParentFile ->fillInputfiles ();
291- mParentFile ->setFile (0 , origin );
327+ mParentFile ->setFile (0 , wantedParentLevel, wantedOrigin );
292328 return mParentFile ;
293329}
294330
@@ -450,8 +486,26 @@ struct CalculateDelta {
450486bool DataInputDescriptor::readTree (DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t & totalSizeCompressed, size_t & totalSizeUncompressed)
451487{
452488 CalculateDelta t (mIOTime );
453- std::string origin = dh.dataOrigin .as <std::string>();
454- auto folder = getFileFolder (counter, numTF, origin);
489+ std::string wantedOrigin = dh.dataOrigin .as <std::string>();
490+ int wantedLevel = mContext .levelForOrigin (wantedOrigin);
491+
492+ // If this origin is mapped to a parent level deeper than current, skip directly without
493+ // attempting to read from this level.
494+ if (wantedLevel != -1 && mLevel < wantedLevel) {
495+ auto [parentFile, parentNumTF] = navigateToLevel (counter, numTF, wantedLevel, wantedOrigin);
496+ if (parentFile == nullptr ) {
497+ auto rootFS = std::dynamic_pointer_cast<TFileFileSystem>(mCurrentFilesystem );
498+ throw std::runtime_error (fmt::format (R"( No parent file found for "{}" while looking for level {} in "{}")" , treename, wantedLevel, rootFS->GetFile ()->GetName ()));
499+ }
500+ if (parentNumTF == -1 ) {
501+ auto parentRootFS = std::dynamic_pointer_cast<TFileFileSystem>(parentFile->mCurrentFilesystem );
502+ throw std::runtime_error (fmt::format (R"( DF not found in parent file "{}")" , parentRootFS->GetFile ()->GetName ()));
503+ }
504+ t.deactivate ();
505+ return parentFile->readTree (outputs, dh, 0 , parentNumTF, treename, totalSizeCompressed, totalSizeUncompressed);
506+ }
507+
508+ auto folder = getFileFolder (counter, numTF, wantedLevel, wantedOrigin);
455509 if (!folder.filesystem ()) {
456510 t.deactivate ();
457511 return false ;
@@ -484,7 +538,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh
484538 if (!format) {
485539 t.deactivate ();
486540 LOGP (debug, " Could not find tree {}. Trying in parent file." , fullpath.path ());
487- auto parentFile = getParentFile (counter, numTF, treename, origin );
541+ auto parentFile = getParentFile (counter, numTF, treename, wantedLevel, wantedOrigin );
488542 if (parentFile != nullptr ) {
489543 int parentNumTF = parentFile->findDFNumber (0 , folder.path ());
490544 if (parentNumTF == -1 ) {
@@ -817,8 +871,9 @@ arrow::dataset::FileSource DataInputDirector::getFileFolder(header::DataHeader d
817871 didesc = mdefaultDataInputDescriptor;
818872 }
819873 std::string origin = dh.dataOrigin .as <std::string>();
874+ int wantedLevel = mContext .levelForOrigin (origin);
820875
821- return didesc->getFileFolder (counter, numTF, origin);
876+ return didesc->getFileFolder (counter, numTF, wantedLevel, origin);
822877}
823878
824879int DataInputDirector::getTimeFramesInFile (header::DataHeader dh, int counter)
@@ -840,8 +895,9 @@ uint64_t DataInputDirector::getTimeFrameNumber(header::DataHeader dh, int counte
840895 didesc = mdefaultDataInputDescriptor;
841896 }
842897 std::string origin = dh.dataOrigin .as <std::string>();
898+ int wantedLevel = mContext .levelForOrigin (origin);
843899
844- return didesc->getTimeFrameNumber (counter, numTF, origin);
900+ return didesc->getTimeFrameNumber (counter, numTF, wantedLevel, origin);
845901}
846902
847903bool DataInputDirector::readTree (DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t & totalSizeCompressed, size_t & totalSizeUncompressed)
0 commit comments