From ce8625de289f0956041c9027fe61da286be41dea Mon Sep 17 00:00:00 2001 From: Gabriel Pregger Date: Tue, 10 Feb 2026 14:00:08 +0100 Subject: [PATCH 1/2] initial support for hdf5 superblock parsing --- src/file_hdf5.c | 88 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 7 deletions(-) diff --git a/src/file_hdf5.c b/src/file_hdf5.c index 43840345..97592eae 100644 --- a/src/file_hdf5.c +++ b/src/file_hdf5.c @@ -34,7 +34,7 @@ #include "types.h" #include "filegen.h" #include "common.h" -#ifdef DEBUG_HDF +#ifdef DEBUG_HDF5 #include "log.h" #endif @@ -42,9 +42,9 @@ static void register_header_check_hdf5(file_stat_t *file_stat); const file_hint_t file_hint_hdf5= { - .extension="hdf", + .extension="h5", .description="Hierarchical Data Format 5", - .max_filesize=PHOTOREC_MAX_SIZE_32, + .max_filesize=PHOTOREC_MAX_FILE_SIZE, .recover=1, .enable_by_default=1, .register_header_check=®ister_header_check_hdf5 @@ -54,7 +54,66 @@ struct hdf5_superblock { uint8_t signature[8]; uint8_t version; -}; + uint8_t version_global_free_space_storage; + uint8_t version_root_group_symbol_table_entry; + uint8_t reserved; + uint8_t version_shared_header_message_format; + uint8_t offsets_size; + uint8_t lengths_size; + // if version == 0, Base Address etc start at offset 24, if version >1 at offset 28 + // Offset Base Address = (bool)version*4 + 24 + 0*offsets_size + // Offset Address of Global Free-Space Heap = (bool)version*4 + 24 + 1*offset_size + // Offset End of File Address = (bool)version*4 + 24 + 2*offset_size + // Size of End of File Address = offset_size +} __attribute__ ((gcc_struct, __packed__)); + +/*@ + @ requires \separated(file_recovery, file_recovery->handle, &errno, &Frama_C_entropy_source, &__fc_heap_status); + @ requires valid_file_check_param(file_recovery); + @ ensures valid_file_check_result(file_recovery); + @*/ +static void file_check_hdf5(file_recovery_t *file_recovery) +{ +#ifdef DEBUG_HDF5 + log_info("HDF5: file_check_hdf5\n"); +#endif + FILE *handle = file_recovery->handle; + + uint8_t sb_version_offset = 8; + uint8_t sb_offset_size_offset = 0x0D; + uint8_t sb_meta_base_address_offset = 0x1C; + /* Get superblock version */ + my_fseek(handle, sb_version_offset, SEEK_SET); + uint8_t sb_version = 0; + fread(&sb_version, 1, 1, handle); +#ifdef DEBUG_HDF5 + log_info("HDF5: file_check_hdf5: superblock version = %u\n", sb_version); +#endif + /* Adjust sb_meta_base_address_offset if necessary */ + if (!sb_version) + sb_meta_base_address_offset = 0x18; +#ifdef DEBUG_HDF5 + log_info("HDF5: file_check_hdf5: dec sb_meta_base_address_offset = %u\n", sb_meta_base_address_offset); + log_info("HDF5: file_check_hdf5: hex sb_meta_base_address_offset = 0x%02X\n", sb_meta_base_address_offset); +#endif + /* Get size of offsets */ + my_fseek(handle, sb_offset_size_offset, SEEK_SET); + uint8_t sb_offset_size = 0; + fread(&sb_offset_size, 1, 1, handle); +#ifdef DEBUG_HDF5 + log_info("HDF5: file_check_hdf5: sb_offset_size = %u\n", sb_offset_size); +#endif + /* Get EOF Address */ + uint8_t eof_address_offset = sb_meta_base_address_offset + 2*sb_offset_size; + my_fseek(handle, eof_address_offset, SEEK_SET); + uint64_t eof_address = 0; + fread(&eof_address, sb_offset_size, 1, handle); +#ifdef DEBUG_HDF5 + log_info("HDF5: file_check_hdf5: dec eof_address = %lu\n", (long unsigned)eof_address); + log_info("HDF5: file_check_hdf5: hex eof_address = 0x%02lX\n", eof_address); +#endif + file_recovery->file_size=eof_address; +} /*@ @ requires buffer_size >= sizeof(struct hdf5_superblock); @@ -65,15 +124,30 @@ struct hdf5_superblock @*/ static int header_check_hdf5(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { +#ifdef DEBUG_HDF5 + log_info("HDF5: header_check_hdf5\n"); +#endif const struct hdf5_superblock *sb=(const struct hdf5_superblock*)&buffer[0]; - /*@ assert \valid_read(sb); */ - if(sb->version > 2) - return 0; + const uint8_t sb_version=sb->version; + const uint8_t offsets_size=sb->offsets_size; +#ifdef DEBUG_HDF5 + log_info("HDF5: header_check_hdf5: size of offsets = %i\n", (int)offsets_size); + log_info("HDF5: header_check_hdf5: superblock version = %i\n", (int)sb_version); +#endif + uint8_t meta_base = 24; + if (sb_version) + meta_base = 28; + const uint8_t eof_address_offset = meta_base + 2*offsets_size; +#ifdef DEBUG_HDF5 + log_info("HDF5: header_check_hdf5: eof_address_offset = 0x%02X\n", (int)eof_address_offset); +#endif reset_file_recovery(file_recovery_new); file_recovery_new->extension=file_hint_hdf5.extension; + file_recovery_new->file_check=&file_check_hdf5; return 1; } + static void register_header_check_hdf5(file_stat_t *file_stat) { static const unsigned char hdf5_header[8]= { 0x89, 'H', 'D', 'F', '\r', '\n', 0x1a, '\n'}; From 9e75cde132ca7d5e2528a6f2797f8c0cb8945ddf Mon Sep 17 00:00:00 2001 From: Gabriel Pregger Date: Thu, 5 Mar 2026 10:25:23 +0100 Subject: [PATCH 2/2] conform with ISO C 90 (hopefully), catch errors on stream seek / read, clean up header_check --- src/file_hdf5.c | 85 +++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/src/file_hdf5.c b/src/file_hdf5.c index 97592eae..ca62df45 100644 --- a/src/file_hdf5.c +++ b/src/file_hdf5.c @@ -19,7 +19,7 @@ Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ - + #if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_hdf5) #ifdef HAVE_CONFIG_H #include @@ -74,43 +74,57 @@ struct hdf5_superblock @*/ static void file_check_hdf5(file_recovery_t *file_recovery) { -#ifdef DEBUG_HDF5 - log_info("HDF5: file_check_hdf5\n"); -#endif FILE *handle = file_recovery->handle; - uint8_t sb_version_offset = 8; - uint8_t sb_offset_size_offset = 0x0D; - uint8_t sb_meta_base_address_offset = 0x1C; - /* Get superblock version */ - my_fseek(handle, sb_version_offset, SEEK_SET); + uint8_t sb_offsets_size_offset = 0x0D; + uint8_t sb_meta_base_address_offset = 0x18; uint8_t sb_version = 0; - fread(&sb_version, 1, 1, handle); + uint8_t sb_offsets_size = 0; + uint8_t eof_address_offset; + uint64_t eof_address = 0; + /* Get superblock version */ + if (my_fseek(handle, sb_version_offset, SEEK_SET) < 0 || + fread(&sb_version, 1, 1, handle) != 1) + { +#ifdef DEBUG_HDF5 + log_error("HDF5: Couldn't read superblock version"); +#endif + return; + } #ifdef DEBUG_HDF5 - log_info("HDF5: file_check_hdf5: superblock version = %u\n", sb_version); + log_info("HDF5: superblock version = %u\n", sb_version); #endif - /* Adjust sb_meta_base_address_offset if necessary */ - if (!sb_version) - sb_meta_base_address_offset = 0x18; + /* Adjust sb_meta_base_address_offset if hdf5 version is >0 */ + if (sb_version) + sb_meta_base_address_offset = 0x1C; #ifdef DEBUG_HDF5 - log_info("HDF5: file_check_hdf5: dec sb_meta_base_address_offset = %u\n", sb_meta_base_address_offset); - log_info("HDF5: file_check_hdf5: hex sb_meta_base_address_offset = 0x%02X\n", sb_meta_base_address_offset); + log_info("HDF5: sb_meta_base_address_offset = 0x%02X\n", sb_meta_base_address_offset); #endif /* Get size of offsets */ - my_fseek(handle, sb_offset_size_offset, SEEK_SET); - uint8_t sb_offset_size = 0; - fread(&sb_offset_size, 1, 1, handle); + if (my_fseek(handle, sb_offsets_size_offset, SEEK_SET) < 0 || + fread(&sb_offsets_size, 1, 1, handle) != 1) + { +#ifdef DEBUG_HDF5 + log_error("HDF5: Couldn't read HDF Size of Offsets"); +#endif + return; + } #ifdef DEBUG_HDF5 - log_info("HDF5: file_check_hdf5: sb_offset_size = %u\n", sb_offset_size); + log_info("HDF5: sb_offsets_size = %u\n", sb_offsets_size); #endif /* Get EOF Address */ - uint8_t eof_address_offset = sb_meta_base_address_offset + 2*sb_offset_size; - my_fseek(handle, eof_address_offset, SEEK_SET); - uint64_t eof_address = 0; - fread(&eof_address, sb_offset_size, 1, handle); + eof_address_offset = sb_meta_base_address_offset + 2*sb_offsets_size; + if (my_fseek(handle, eof_address_offset, SEEK_SET) < 0 || + fread(&eof_address, sb_offsets_size, 1, handle) != 1) + { +#ifdef DEBUG_HDF5 + log_error("HDF5: Couldn't read HDF End of File Address"); +#endif + return; + } #ifdef DEBUG_HDF5 - log_info("HDF5: file_check_hdf5: dec eof_address = %lu\n", (long unsigned)eof_address); - log_info("HDF5: file_check_hdf5: hex eof_address = 0x%02lX\n", eof_address); + log_info("HDF5: dec eof_address = %lu\n", (long unsigned)eof_address); + log_info("HDF5: hex eof_address = 0x%02lX\n", eof_address); #endif file_recovery->file_size=eof_address; } @@ -124,30 +138,17 @@ static void file_check_hdf5(file_recovery_t *file_recovery) @*/ static int header_check_hdf5(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { -#ifdef DEBUG_HDF5 - log_info("HDF5: header_check_hdf5\n"); -#endif const struct hdf5_superblock *sb=(const struct hdf5_superblock*)&buffer[0]; const uint8_t sb_version=sb->version; - const uint8_t offsets_size=sb->offsets_size; -#ifdef DEBUG_HDF5 - log_info("HDF5: header_check_hdf5: size of offsets = %i\n", (int)offsets_size); - log_info("HDF5: header_check_hdf5: superblock version = %i\n", (int)sb_version); -#endif - uint8_t meta_base = 24; - if (sb_version) - meta_base = 28; - const uint8_t eof_address_offset = meta_base + 2*offsets_size; -#ifdef DEBUG_HDF5 - log_info("HDF5: header_check_hdf5: eof_address_offset = 0x%02X\n", (int)eof_address_offset); -#endif + if (sb_version > 1) { + return 0; + } reset_file_recovery(file_recovery_new); file_recovery_new->extension=file_hint_hdf5.extension; file_recovery_new->file_check=&file_check_hdf5; return 1; } - static void register_header_check_hdf5(file_stat_t *file_stat) { static const unsigned char hdf5_header[8]= { 0x89, 'H', 'D', 'F', '\r', '\n', 0x1a, '\n'};