From 60701b1dacc445cc73f572f725c908d9c23c57e0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:32:19 -0500 Subject: [PATCH 01/25] [idb_import] Rebase BaseSection against lowest segment, not lowest section When an IDB only records a section-relative base address (loading_base of zero, so we fall back to min_ea as a BaseSection), the rebase delta was computed against the lowest mapped *section* in the view. That over-shifts every imported address for formats where the first section starts after the file header. IDA's min_ea is the image base and maps the file header too, whereas a Mach-O's first section (__text) begins after the header and load commands. Aligning against the lowest mapped segment (segments include the header region) yields the correct delta and stops imported addresses from being shifted by the header size. --- plugins/idb_import/src/mapper.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 530fef1f06..593fdb75cf 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -45,13 +45,18 @@ impl IDBMapper { BaseAddressInfo::None => bn_base_address, BaseAddressInfo::BaseSegment(start_addr) => bn_base_address.wrapping_sub(start_addr), BaseAddressInfo::BaseSection(section_addr) => { - let bn_section_addr = view - .sections() + // Align against the lowest mapped *segment*, not the lowest section. + // IDA's `min_ea` is the image base (it maps the file header too), but a + // Mach-O's first section (`__text`) starts after the header + load + // commands. Using the lowest section here over-shifts every imported + // address by the header size; segments include the header region. + let bn_segment_addr = view + .segments() .iter() - .min_by_key(|s| s.start()) - .map(|s| s.start()); - match bn_section_addr { - Some(bn_section) => bn_section.wrapping_sub(section_addr), + .map(|s| s.address_range().start) + .min(); + match bn_segment_addr { + Some(bn_segment) => bn_segment.wrapping_sub(section_addr), None => bn_base_address, } } From 125f3867c4fe06e428b477133ceec57eed17cdef Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:32:38 -0500 Subject: [PATCH 02/25] [idb_import] Remove orphaned types.rs translator The "IDB Import refactor" introduced translate.rs (TILTranslator) as the type translator used by the mapper, but left the previous translator in types.rs behind. The module was never re-declared in lib.rs, so it has not been compiled or referenced since the refactor. Removing it drops a large block of dead code along with its stale TODOs; TILTranslator is now the single source of truth for IDB->BN type translation. --- plugins/idb_import/src/types.rs | 770 -------------------------------- 1 file changed, 770 deletions(-) delete mode 100644 plugins/idb_import/src/types.rs diff --git a/plugins/idb_import/src/types.rs b/plugins/idb_import/src/types.rs deleted file mode 100644 index cbd37f66c8..0000000000 --- a/plugins/idb_import/src/types.rs +++ /dev/null @@ -1,770 +0,0 @@ -use std::collections::HashMap; -use std::num::{NonZeroU16, NonZeroU8}; - -use anyhow::{anyhow, Result}; -use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; -use binaryninja::binary_view::{BinaryView}; -use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; -use binaryninja::rc::Ref; -use binaryninja::types::{ - EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, StructureBuilder, - StructureType, Type, -}; -use idb_rs::til::function::CallingConvention as TILCallingConvention; -use idb_rs::til::pointer::Pointer as TILPointer; -use idb_rs::til::r#enum::EnumMembers; -use idb_rs::til::{ - array::Array as TILArray, function::Function as TILFunction, r#enum::Enum as TILEnum, - r#struct::Struct as TILStruct, r#struct::StructMember as TILStructMember, - r#union::Union as TILUnion, section::TILSection, TILTypeInfo, Type as TILType, - TypeVariant as TILTypeVariant, -}; -use idb_rs::IDBString; - -#[derive(Debug, Clone)] -pub enum BnTypeError { - // TODO delete this and make this verification during the TIL/IDB parsing, translating the ordinal - // into a kind of type_idx - OrdinalNotFound(u32), - NameNotFound(String), - - Typedef(Box), - Function(BnTypeFunctionError), - Array(Box), - Pointer(Box), - /// Error for members - Struct(Vec<(usize, BnTypeError)>), - Union(Vec<(usize, BnTypeError)>), - - // can't create function due to missing CallingConvention - MissingArchCC, -} - -#[derive(Default, Debug, Clone)] -pub struct BnTypeFunctionError { - pub ret: Option>, - pub args: Vec<(usize, BnTypeError)>, -} - -impl std::fmt::Display for BnTypeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - BnTypeError::OrdinalNotFound(i) => write!(f, "Reference to non existing Ordinal {i}"), - BnTypeError::NameNotFound(name) => write!(f, "Reference to non existing name {name}"), - BnTypeError::Typedef(error) => write!(f, "Typedef: {error}"), - BnTypeError::Function(BnTypeFunctionError { ret, args }) => { - if let Some(error) = ret { - write!(f, "Function return: {error} ")?; - } - for (i, error) in args { - write!(f, "Function argument {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Array(error) => write!(f, "Array: {error}"), - BnTypeError::Struct(errors) => { - for (i, error) in errors { - write!(f, "Struct Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Union(errors) => { - for (i, error) in errors { - write!(f, "Union Member {i}: {error} ")?; - } - Ok(()) - } - BnTypeError::Pointer(error) => write!(f, "Pointer: {error}"), - BnTypeError::MissingArchCC => write!(f, "Arch is missing a default CallingConvention"), - } - } -} - -#[derive(Default)] -pub enum TranslateTypeResult { - #[default] - NotYet, - /// Unable to solve type, there is no point in trying again - Error(BnTypeError), - /// a type that is not final, but equivalent to the final type, if error, there is no - /// point in trying again - PartiallyTranslated(Ref, Option), - Translated(Ref), -} - -impl From, BnTypeError>> for TranslateTypeResult { - fn from(value: Result, BnTypeError>) -> Self { - match value { - Ok(ty) => Self::Translated(ty), - Err(error) => Self::Error(error), - } - } -} - -pub struct TranslatesIDBType<'a> { - // sanitized name from IDB - pub name: IDBString, - // the result, if converted - pub ty: TranslateTypeResult, - pub og_ty: &'a TILTypeInfo, - pub is_symbol: bool, -} - -pub struct TranslateIDBTypes<'a, F: Fn(usize, usize) -> Result<(), ()>> { - pub arch: CoreArchitecture, - pub progress: F, - pub til: &'a TILSection, - // note it's mapped 1:1 with the same index from til types.chain(symbols) - pub types: Vec>, -} - -impl Result<(), ()>> TranslateIDBTypes<'_, F> { - fn find_typedef_by_name(&self, name: &[u8]) -> Option { - if name.is_empty() { - // TODO this is my assumption, maybe an empty names Typedef means something else. - return Some(TranslateTypeResult::Translated(Type::void())); - } - - // check for types that ar usually not defined directly - match name { - b"Unkown" | b"uint8_t" => Some(TranslateTypeResult::Translated(Type::int(1, false))), - b"IUnkown" | b"int8_t" => Some(TranslateTypeResult::Translated(Type::int(1, true))), - b"SHORT" | b"USHORT" => Some(TranslateTypeResult::Translated(Type::int( - self.til.sizeof_short().get().into(), - name == b"SHORT", - ))), - b"int16_t" => Some(TranslateTypeResult::Translated(Type::int(2, true))), - b"uint16_t" => Some(TranslateTypeResult::Translated(Type::int(2, false))), - b"int32_t" => Some(TranslateTypeResult::Translated(Type::int(4, true))), - b"uint32_t" => Some(TranslateTypeResult::Translated(Type::int(4, false))), - b"int64_t" => Some(TranslateTypeResult::Translated(Type::int(8, true))), - b"uint64_t" => Some(TranslateTypeResult::Translated(Type::int(8, false))), - b"int128_t" => Some(TranslateTypeResult::Translated(Type::int(16, true))), - b"uint128_t" => Some(TranslateTypeResult::Translated(Type::int(16, false))), - _ => None, - } - } - - fn find_typedef(&self, ty: &TranslatesIDBType) -> TranslateTypeResult { - // only return a typedef, if it's solved, at least partially - match &ty.ty { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Typedef(Box::new(error.to_owned()))) - } - TranslateTypeResult::PartiallyTranslated(og_ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - error - .as_ref() - .map(|x| BnTypeError::Typedef(Box::new(x.clone()))) - .clone(), - ) - } - TranslateTypeResult::Translated(og_ty) => TranslateTypeResult::Translated( - Type::named_type_from_type(ty.name.as_utf8_lossy(), og_ty), - ), - } - } - - fn translate_pointer(&self, ty: &TILPointer) -> TranslateTypeResult { - match self.translate_type(&ty.typ) { - TranslateTypeResult::Translated(trans) => { - TranslateTypeResult::Translated(self.inner_translate_pointer(ty, &trans)) - } - TranslateTypeResult::PartiallyTranslated(trans, error) => { - TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &trans), - error.map(|e| BnTypeError::Pointer(Box::new(e))), - ) - } - TranslateTypeResult::Error(error) => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - Some(error), - ), - TranslateTypeResult::NotYet => TranslateTypeResult::PartiallyTranslated( - self.inner_translate_pointer(ty, &Type::void()), - None, - ), - } - } - - fn inner_translate_pointer(&self, ty: &TILPointer, trans: &Type) -> Ref { - // TODO handle ty.shifted - // TODO handle ty.modifier - Type::pointer_with_options(&self.arch, trans, ty.typ.is_const, ty.typ.is_volatile, None) - } - - fn translate_function(&self, fun: &TILFunction) -> TranslateTypeResult { - let mut is_partial = false; - let mut errors: BnTypeFunctionError = Default::default(); - // funtions are always 0 len, so it's translated or partial(void) - let return_ty = match self.translate_type(&fun.ret) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial |= true; - errors.ret = error.map(Box::new); - trans - } - TranslateTypeResult::Error(error) => { - errors.ret = Some(Box::new(error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - }; - let mut partial_error_args = vec![]; - let mut bn_args = Vec::with_capacity(fun.args.len()); - for (i, fun_arg) in fun.args.iter().enumerate() { - let arg = match self.translate_type(&fun_arg.ty) { - TranslateTypeResult::Translated(trans) => trans, - TranslateTypeResult::PartiallyTranslated(trans, error) => { - is_partial = true; - if let Some(error) = error { - errors.args.push((i, error)); - } - trans - } - TranslateTypeResult::NotYet => { - return TranslateTypeResult::PartiallyTranslated(Type::void(), None) - } - TranslateTypeResult::Error(error) => { - partial_error_args.push((i, error)); - return TranslateTypeResult::PartiallyTranslated( - Type::void(), - Some(BnTypeError::Function(errors)), - ); - } - }; - // TODO create location from `arg_loc`? - let loc = None; - let name = fun_arg - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("arg_{i}")); - bn_args.push(FunctionParameter::new(arg, name, loc)); - } - - let var_args = matches!(fun.calling_convention, Some(TILCallingConvention::Ellipsis)); - let cc = fun - .calling_convention - .and_then(|cc| convert_cc(&self.arch, cc)) - .or_else(|| self.arch.get_default_calling_convention()) - .or_else(|| { - self.arch - .calling_conventions() - .iter() - .next() - .map(|x| x.clone()) - }); - let Some(cc) = cc else { - return TranslateTypeResult::Error(BnTypeError::MissingArchCC); - }; - let ty = Type::function_with_opts( - &return_ty, - &bn_args, - var_args, - cc, - Conf::new(0, binaryninja::confidence::MIN_CONFIDENCE), - ); - if is_partial { - let error = (errors.ret.is_some() || !errors.args.is_empty()) - .then_some(BnTypeError::Function(errors)); - TranslateTypeResult::PartiallyTranslated(ty, error) - } else { - assert!(errors.ret.is_none() && errors.args.is_empty()); - TranslateTypeResult::Translated(ty) - } - } - - // TODO can binja handle 0 sized array? There is a better translation? - fn translate_array(&self, array: &TILArray) -> TranslateTypeResult { - match self.translate_type(&array.elem_type) { - TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, - TranslateTypeResult::Translated(ty) => TranslateTypeResult::Translated(Type::array( - &ty, - array.nelem.map(NonZeroU16::get).unwrap_or(0).into(), - )), - TranslateTypeResult::PartiallyTranslated(ty, error) => { - TranslateTypeResult::PartiallyTranslated( - Type::array(&ty, array.nelem.map(NonZeroU16::get).unwrap_or(0).into()), - error.map(Box::new).map(BnTypeError::Array), - ) - } - TranslateTypeResult::Error(error) => { - TranslateTypeResult::Error(BnTypeError::Array(Box::new(error))) - } - } - } - - fn condensate_bitfields_from_struct( - &self, - offset: usize, - members_slice: &[TILStructMember], - struct_builder: &mut StructureBuilder, - ) { - if members_slice.is_empty() { - unreachable!() - } - let mut members = members_slice - .iter() - .map(|ty| match &ty.member_type.type_variant { - TILTypeVariant::Bitfield(b) => b, - _ => unreachable!(), - }) - .enumerate(); - let (_, first_field) = members.next().unwrap(); - let mut current_field_bytes = first_field.nbytes; - let mut current_field_bits: u32 = first_field.width.into(); - let mut start_idx = 0; - - let mut create_field = |start_idx, i, bytes| { - let name = if start_idx == i - 1 { - let member: &TILStructMember = &members_slice[i - 1]; - member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("bitfield_{}", offset + start_idx)) - } else { - format!("bitfield_{}_{}", offset + start_idx, offset + (i - 1)) - }; - let field = field_from_bytes(bytes); - struct_builder.append(&field, &name, MemberAccess::NoAccess, MemberScope::NoScope); - }; - - for (i, member) in members { - // starting a new field - let max_bits = u32::from(current_field_bytes.get()) * 8; - // this bitfield start a a new field, or can't contain other bitfields - // finish the previous and start a new - if current_field_bytes != member.nbytes - || max_bits < current_field_bits + u32::from(member.width) - { - create_field(start_idx, i, current_field_bytes.get().into()); - current_field_bytes = member.nbytes; - current_field_bits = 0; - start_idx = i; - } - - // just add the current bitfield into the field - current_field_bits += u32::from(member.width); - } - - if current_field_bits != 0 { - create_field( - start_idx, - members_slice.len(), - current_field_bytes.get().into(), - ); - } - } - - fn translate_struct(&self, ty_struct: &TILStruct) -> TranslateTypeResult { - if ty_struct.members.is_empty() { - // binary ninja crashes if you create an empty struct, because it divide by 0 - return TranslateTypeResult::Translated(Type::void()); - } - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - if let Some(align) = ty_struct.alignment { - structure.alignment(align.get().into()); - } - structure.packed(ty_struct.is_unaligned && ty_struct.is_uknown_8); - - let mut errors = vec![]; - let mut first_bitfield_seq = None; - for (i, member) in ty_struct.members.iter().enumerate() { - match (&member.member_type.type_variant, first_bitfield_seq) { - // accumulate the bitfield to be condensated - (TILTypeVariant::Bitfield(_bit), None) => { - first_bitfield_seq = Some(i); - continue; - } - (TILTypeVariant::Bitfield(_bit), Some(_)) => continue, - - // condensate the bitfields into byte-wide fields - (_, Some(start_idx)) => { - first_bitfield_seq = None; - let members_bitrange = &ty_struct.members[start_idx..i]; - self.condensate_bitfields_from_struct( - start_idx, - members_bitrange, - &mut structure, - ); - } - - (_, None) => {} - } - - let mem = match self.translate_type(&member.member_type) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::PartiallyTranslated(partial_ty, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial_ty - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Struct(errors)); - } - }; - //TODO handle member.alignment - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - if let Some(start_idx) = first_bitfield_seq { - let members_bitrange = &ty_struct.members[start_idx..]; - self.condensate_bitfields_from_struct(start_idx, members_bitrange, &mut structure); - } - let bn_ty = Type::structure(&structure.finalize()); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_union(&self, ty_union: &TILUnion) -> TranslateTypeResult { - let mut is_partial = false; - let mut structure = StructureBuilder::new(); - structure.structure_type(StructureType::UnionStructureType); - let mut errors = vec![]; - for (i, member) in ty_union.members.iter().enumerate() { - // bitfields can be translated into complete fields - let mem = match &member.ty.type_variant { - TILTypeVariant::Bitfield(field) => field_from_bytes(field.nbytes.get().into()), - _ => match self.translate_type(&member.ty) { - TranslateTypeResult::Translated(ty) => ty, - TranslateTypeResult::Error(error) => { - errors.push((i, error)); - return TranslateTypeResult::Error(BnTypeError::Union(errors)); - } - TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, - TranslateTypeResult::PartiallyTranslated(partial, error) => { - is_partial |= true; - if let Some(error) = error { - errors.push((i, error)); - } - partial - } - }, - }; - - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - structure.append(&mem, &name, MemberAccess::NoAccess, MemberScope::NoScope); - } - let str_ref = structure.finalize(); - - let bn_ty = Type::structure(&str_ref); - if is_partial { - let partial_error = (!errors.is_empty()).then_some(BnTypeError::Struct(errors)); - TranslateTypeResult::PartiallyTranslated(bn_ty, partial_error) - } else { - assert!(errors.is_empty()); - TranslateTypeResult::Translated(bn_ty) - } - } - - fn translate_enum(&self, ty_enum: &TILEnum) -> Ref { - let mut eb = EnumerationBuilder::new(); - match &ty_enum.members { - EnumMembers::Regular(enum_members) => { - for (i, member) in enum_members.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - EnumMembers::Groups(enum_groups) => { - for group in enum_groups { - // TODO implement groups to the importer - for (i, member) in group.sub_fields.iter().enumerate() { - let name = member - .name - .as_ref() - .map(|name| name.as_utf8_lossy().to_string()) - .unwrap_or_else(|| format!("member_{i}")); - eb.insert(&name, member.value); - } - } - } - } - - Type::enumeration( - &eb.finalize(), - // TODO: This looks bad, look at the comment in [`Type::width`]. - // TODO check the default size of enum - ty_enum - .storage_size - .map(|x| x.into()) - .or(self.til.header.size_enum.map(|x| x.into())) - .unwrap_or(4.try_into().unwrap()), - ty_enum.is_signed, - ) - } - - fn translate_basic(&self, mdata: &idb_rs::til::Basic) -> Ref { - match *mdata { - idb_rs::til::Basic::Void => Type::void(), - idb_rs::til::Basic::Unknown { bytes: 0 } => Type::void(), - idb_rs::til::Basic::Unknown { bytes } => Type::array(&Type::char(), bytes.into()), - idb_rs::til::Basic::Bool if self.til.header.size_bool.get() == 1 => Type::bool(), - idb_rs::til::Basic::BoolSized { bytes } if bytes.get() == 1 => Type::bool(), - // NOTE Binja don't have any representation for bool other then the default - idb_rs::til::Basic::BoolSized { bytes } => Type::int(bytes.get().into(), false), - idb_rs::til::Basic::Bool /*if self.til.header.size_bool.get() != 1*/ => Type::int(self.til.header.size_bool.get().into(), false), - idb_rs::til::Basic::Char => Type::char(), - // TODO what exacly is Segment Register? - idb_rs::til::Basic::SegReg => Type::char(), - idb_rs::til::Basic::IntSized { bytes, is_signed } => { - // default into signed - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(bytes.get().into(), is_signed) - } - idb_rs::til::Basic::Int { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.header.size_int.get().into(), is_signed) - }, - idb_rs::til::Basic::Short { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_short().get().into(), is_signed) - }, - idb_rs::til::Basic::Long { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongLong { is_signed } => { - let is_signed = is_signed.as_ref().copied().unwrap_or(true); - Type::int(self.til.sizeof_long_long().get().into(), is_signed) - }, - idb_rs::til::Basic::LongDouble => { - // TODO is size_long_double architecture dependent? - Type::float(self.til.header.size_long_double.map(NonZeroU8::get).unwrap_or(8).into()) - }, - idb_rs::til::Basic::Float { bytes } => Type::float(bytes.get().into()), - } - } - - pub fn translate_type(&self, ty: &TILType) -> TranslateTypeResult { - match &ty.type_variant { - // types that are always translatable - TILTypeVariant::Basic(meta) => { - TranslateTypeResult::Translated(self.translate_basic(meta)) - } - TILTypeVariant::Bitfield(bit) => { - TranslateTypeResult::Translated(field_from_bytes(bit.nbytes.get().into())) - } - TILTypeVariant::Enum(ty_enum) => { - TranslateTypeResult::Translated(self.translate_enum(ty_enum)) - } - TILTypeVariant::Typeref(typeref) => match &typeref.typeref_value { - idb_rs::til::TyperefValue::Ref(idx) => self.find_typedef(&self.types[*idx]), - idb_rs::til::TyperefValue::UnsolvedName(name) => self - .find_typedef_by_name(name.as_ref().map(|x| x.as_bytes()).unwrap_or(&[])) - .unwrap_or_else(|| { - TranslateTypeResult::Error(BnTypeError::NameNotFound( - name.as_ref() - .map(|x| x.as_utf8_lossy().to_string()) - .unwrap_or(String::new()), - )) - }), - idb_rs::til::TyperefValue::UnsolvedOrd(ord) => { - TranslateTypeResult::Error(BnTypeError::OrdinalNotFound(*ord)) - } - }, - - TILTypeVariant::Pointer(ty) => self.translate_pointer(ty), - TILTypeVariant::Function(fun) => self.translate_function(fun), - - // can only be partially solved if all fields are solved or partially solved - TILTypeVariant::Array(array) => self.translate_array(array), - TILTypeVariant::Struct(ty_struct) => self.translate_struct(ty_struct), - TILTypeVariant::Union(ty_union) => self.translate_union(ty_union), - } - } -} - -pub fn translate_ephemeral_type(debug_file: &BinaryView, ty: &TILType) -> TranslateTypeResult { - // in case we need to translate types - let header = idb_rs::til::ephemeral_til_header(); - let translator = TranslateIDBTypes { - arch: debug_file.default_arch().unwrap(/* TODO */), - progress: |_, _| Ok(()), - // TODO it's unclear what to do here - til: &TILSection { - header, - symbols: vec![], - types: vec![], - macros: None, - }, - types: vec![], - }; - - translator.translate_type(ty) -} - -pub fn translate_til_types( - arch: CoreArchitecture, - til: &TILSection, - progress: impl Fn(usize, usize) -> Result<(), ()>, -) -> Result>> { - let total = til.symbols.len() + til.types.len(); - let mut types = Vec::with_capacity(total); - let mut types_by_ord = HashMap::with_capacity(total); - let mut types_by_name = HashMap::with_capacity(total); - let all_types = til.types.iter().zip(core::iter::repeat(false)); - // TODO: it's unclear how the demangle symbols and types names/ord, for now only parse types - //let all_types = all_types.chain(til.symbols.iter().zip(core::iter::repeat(true))); - for (i, (ty, is_symbol)) in all_types.enumerate() { - // TODO sanitized the input - // TODO find out how the namespaces used by TIL works - types.push(TranslatesIDBType { - name: ty.name.clone(), - is_symbol, - og_ty: ty, - ty: TranslateTypeResult::NotYet, - }); - if ty.ordinal != 0 && !is_symbol { - let dup1 = types_by_ord.insert(ty.ordinal, i); - if let Some(old) = dup1 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup ord {}:{} {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - ty.ordinal, - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - if !ty.name.as_bytes().is_empty() { - let dup2 = types_by_name.insert(ty.name.as_bytes().to_owned(), i); - if let Some(old) = dup2 { - let old_type = &types[old]; - let new_type = types.last().unwrap(); - // TODO error? - panic!( - "dup name {}:{}: {}:\n{:?}\n{:?}", - old_type.is_symbol, - new_type.is_symbol, - &ty.name.as_utf8_lossy(), - &old_type.og_ty, - &new_type.og_ty, - ) - } - } - } - - let mut translator = TranslateIDBTypes { - arch, - progress, - til, - types, - }; - if (translator.progress)(0, total).is_err() { - return Err(anyhow!("IDB import aborted")); - } - - // solve types until there is nothing else that can be solved - loop { - // if something was solved, mark this variable as true - let mut did_something = false; - let mut num_translated = 0usize; - for i in 0..translator.types.len() { - match &translator.types[i].ty { - TranslateTypeResult::NotYet => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - did_something |= !matches!(&result, TranslateTypeResult::NotYet); - translator.types[i].ty = result; - } - TranslateTypeResult::PartiallyTranslated(_, None) => { - let result = translator.translate_type(&translator.types[i].og_ty.tinfo); - // don't allow regress, it can goes from PartiallyTranslated to any state other then NotYet - assert!(!matches!(&result, TranslateTypeResult::NotYet)); - did_something |= - !matches!(&result, TranslateTypeResult::PartiallyTranslated(_, None)); - translator.types[i].ty = result; - // don't need to add again they will be fixed on the loop below - } - // if an error was produced, there is no point in try again - TranslateTypeResult::PartiallyTranslated(_, Some(_)) => {} - // NOTE for now we are just accumulating errors, just try to translate the max number - // of types as possible - TranslateTypeResult::Error(_) => {} - // already translated, nothing do to here - TranslateTypeResult::Translated(_) => {} - } - - // count the number of finished types - if let TranslateTypeResult::Translated(_) = &translator.types[i].ty { - num_translated += 1 - } - } - - if !did_something { - // means we acomplilshed nothing during this loop, there is no point in trying again - break; - } - if (translator.progress)(num_translated, total).is_err() { - // error means the user aborted the progress - break; - } - } - - Ok(translator.types) -} - -fn field_from_bytes(bytes: i32) -> Ref { - match bytes { - 0 => unreachable!(), - num @ (1 | 2 | 4 | 8 | 16) => Type::int(num.try_into().unwrap(), false), - nelem => Type::array(&Type::char(), nelem.try_into().unwrap()), - } -} - -fn convert_cc( - arch: &A, - in_cc: TILCallingConvention, -) -> Option> { - match in_cc { - TILCallingConvention::Cdecl => arch.get_cdecl_calling_convention(), - TILCallingConvention::Ellipsis => arch.get_cdecl_calling_convention(), - TILCallingConvention::Stdcall => arch.get_stdcall_calling_convention(), - TILCallingConvention::Fastcall => arch.get_fastcall_calling_convention(), - TILCallingConvention::Voidarg - | TILCallingConvention::Pascal - | TILCallingConvention::Thiscall - | TILCallingConvention::Swift - | TILCallingConvention::Golang - | TILCallingConvention::Reserved3 - | TILCallingConvention::Uservars - | TILCallingConvention::Userpurge - | TILCallingConvention::Usercall => None, - } -} From d69d3c791d8de61d85e4cd3bb22042e1224c87fa Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:38:30 -0500 Subject: [PATCH 03/25] [idb_import] Improve TIL type translation fidelity Resolve the outstanding translation TODOs in the TIL translator: - Size the variable-width C basic types (bool/short/int/long/long long/ long double) from the TIL header's compiler sizing info when a TIL is attached, falling back to the standard C ABI defaults. Both build_basic_ty and width_of_type now share these sizes so referenced-type placeholder widths stay in step with the types they stand in for. - Translate BoolSized to a real width: a 1-byte bool stays bool, any other width becomes an unsigned int of that size (BN bool is always one byte). - Honor pointer __ptr32 / __ptr64 modifiers to override the platform address size, and document that based/shifted pointers have no BN representation. - Detect variadic functions via the ellipsis calling convention instead of hardcoding has_variable_args to false. - Add udt extra_padding to the computed structure width so fixed-size UDTs occupy their true storage size. - Document the resolved design decisions for grouped (bitmask) enums, flexible array members, struct/union placeholder widths, the function return location, and the authoritative pointer address size. --- plugins/idb_import/src/translate.rs | 144 ++++++++++++++++++++++------ 1 file changed, 116 insertions(+), 28 deletions(-) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index c3af6fa697..4eb4b81d64 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -12,6 +12,7 @@ use binaryninja::types::{ TypeContainer, }; use idb_rs::til::function::CallingConvention; +use idb_rs::til::pointer::PointerModifier; use idb_rs::til::r#enum::EnumMembers; use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; use std::collections::{HashMap, HashSet}; @@ -64,6 +65,18 @@ pub struct TILTranslator { pub address_size: usize, /// Default size of enumerations. pub enum_size: usize, + /// Default size of a `bool` in bytes. + pub bool_size: usize, + /// Default size of a `short` in bytes. + pub short_size: usize, + /// Default size of an `int` in bytes. + pub int_size: usize, + /// Default size of a `long` in bytes. + pub long_size: usize, + /// Default size of a `long long` in bytes. + pub long_long_size: usize, + /// Default size of a `long double` in bytes. + pub long_double_size: usize, /// Reference types, for use with typedefs. /// /// This is necessary because ordinals do not have names and can't be made into a [`NamedTypeReference`]. @@ -88,6 +101,12 @@ impl TILTranslator { Self { address_size, enum_size: address_size / 2, + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -102,6 +121,12 @@ impl TILTranslator { Self { address_size: platform.address_size(), enum_size: platform.arch().default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -116,6 +141,12 @@ impl TILTranslator { Self { address_size: arch.address_size(), enum_size: arch.default_integer_size(), + bool_size: 1, + short_size: 2, + int_size: 4, + long_size: 4, + long_long_size: 8, + long_double_size: 8, reference_types_by_ord: HashMap::new(), reference_types_by_name: HashMap::new(), used_types: Rc::new(Mutex::new(HashSet::new())), @@ -131,13 +162,28 @@ impl TILTranslator { self.enum_size = size_enum.get() as usize; } + // The TIL header carries the C basic-type sizing for the compiler the IDB was built + // for; prefer it over our architecture-derived defaults so types translate with the + // same widths IDA used. + self.bool_size = til.header.size_bool.get() as usize; + self.int_size = til.header.size_int.get() as usize; + self.short_size = til.sizeof_short().get() as usize; + self.long_size = til.sizeof_long().get() as usize; + self.long_long_size = til.sizeof_long_long().get() as usize; + if let Some(size_long_double) = til.header.size_long_double { + self.long_double_size = size_long_double.get() as usize; + } + // Add referencable types so that type def lookups can occur. self.reference_types_by_ord.reserve(til.types.len()); for (_idx, ty) in til.types.iter().enumerate() { self.add_referenced_type_info(ty); } - // TODO: Handle address (pointer) size information? + // NOTE: The TIL exposes `addr_size()`, but it is derived from the compiler model and + // defaults to 4 when that is absent, which would silently truncate pointers on a + // 64-bit binary. The platform/architecture `address_size` we were constructed with is + // authoritative for pointer widths, so we intentionally keep it. self } @@ -192,7 +238,8 @@ impl TILTranslator { pub fn build_basic_ty(&self, basic_ty: &idb_rs::til::Basic) -> anyhow::Result { use idb_rs::til::Basic; - // TODO: Grab the sizing information of these types from the TIL instead of hardcoding. + // The variable-width C types (short/int/long/...) are sized from the TIL header when + // one is available (see `with_til_info`), falling back to the standard C ABI defaults. match basic_ty { Basic::Void => Ok(TypeBuilder::void()), Basic::Unknown { bytes } => { @@ -200,17 +247,26 @@ impl TILTranslator { // so we are going to be liberal and allow unknown basic types to be treated as a sized int. Ok(TypeBuilder::int(*bytes as usize, false)) } - Basic::Bool => Ok(TypeBuilder::bool()), - Basic::BoolSized { .. } => { - // TODO: This needs to be resized, if that cannot be done, make a NTR to an int named BOOL? - Ok(TypeBuilder::bool()) - } + Basic::Bool if self.bool_size == 1 => Ok(TypeBuilder::bool()), + // Binary Ninja's `bool` is always a single byte; a wider `bool` is represented as an + // unsigned integer of the requested size. + Basic::Bool => Ok(TypeBuilder::int(self.bool_size, false)), + Basic::BoolSized { bytes } if bytes.get() == 1 => Ok(TypeBuilder::bool()), + Basic::BoolSized { bytes } => Ok(TypeBuilder::int(bytes.get() as usize, false)), Basic::Char => Ok(TypeBuilder::char()), Basic::SegReg => Err(anyhow::anyhow!("SegReg is not supported")), - Basic::Short { is_signed } => Ok(TypeBuilder::int(2, is_signed.unwrap_or(true))), - Basic::Long { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), - Basic::LongLong { is_signed } => Ok(TypeBuilder::int(8, is_signed.unwrap_or(true))), - Basic::Int { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), + Basic::Short { is_signed } => { + Ok(TypeBuilder::int(self.short_size, is_signed.unwrap_or(true))) + } + Basic::Long { is_signed } => { + Ok(TypeBuilder::int(self.long_size, is_signed.unwrap_or(true))) + } + Basic::LongLong { is_signed } => { + Ok(TypeBuilder::int(self.long_long_size, is_signed.unwrap_or(true))) + } + Basic::Int { is_signed } => { + Ok(TypeBuilder::int(self.int_size, is_signed.unwrap_or(true))) + } Basic::IntSized { bytes, is_signed } => { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::int(bytes as usize, is_signed.unwrap_or(true))) @@ -219,7 +275,7 @@ impl TILTranslator { let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); Ok(TypeBuilder::float(bytes as usize)) } - Basic::LongDouble => Ok(TypeBuilder::float(8)), + Basic::LongDouble => Ok(TypeBuilder::float(self.long_double_size)), } } @@ -227,11 +283,21 @@ impl TILTranslator { &self, pointer_ty: &idb_rs::til::pointer::Pointer, ) -> anyhow::Result { - // TODO: Consult pointer_ty.closure (is this how we can get based pointers?) + // A `__ptr32` / `__ptr64` modifier overrides the platform address size for this pointer + // (e.g. a 32-bit pointer embedded in an otherwise 64-bit binary). + // + // NOTE: `closure` (based pointers) and `shifted` pointers have no direct Binary Ninja + // representation, so the pointee type and width are preserved but those attributes are + // intentionally dropped. + let pointer_width = match pointer_ty.modifier { + Some(PointerModifier::Ptr32) => 4, + Some(PointerModifier::Ptr64) => 8, + Some(PointerModifier::Restricted) | None => self.address_size, + }; let inner_ty = self.translate_type_info(&pointer_ty.typ)?; Ok(TypeBuilder::pointer_of_width( &inner_ty, - self.address_size, + pointer_width, // NOTE: Set later in `translate_type_info`. false, // NOTE: Set later in `translate_type_info`. @@ -244,10 +310,16 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // TODO: Once branch `test_call_layout` lands use function_ty.retloc to recover return location. + // NOTE: `function_ty.retloc` carries the explicit return-value location, but mapping an + // IDA `ArgLoc` onto a Binary Ninja variable storage location is not yet implemented, so + // the return location is left to be derived by analysis. let return_ty = self.translate_type_info(&function_ty.ret)?; let params: Vec = self.build_function_params(&function_ty.args)?; - let has_variable_args = false; + // An ellipsis calling convention is IDA's marker for a variadic function. + let has_variable_args = matches!( + function_ty.calling_convention, + Some(CallingConvention::Ellipsis) + ); let stack_adjust = Conf::new(0, 0); let builder = match function_ty.calling_convention { @@ -403,8 +475,10 @@ impl TILTranslator { builder.insert_member(member, false); } + // `extra_padding` records trailing padding bytes IDA stores for fixed-size UDTs; add it + // to the member-derived width so the structure occupies its true storage size. + let width = width + udt_ty.extra_padding.unwrap_or(0); builder.width(width); - // TODO: Handle udt_ty.extra_padding (is that tail padding?) Ok(TypeBuilder::structure(&builder.finalize())) } @@ -478,7 +552,10 @@ impl TILTranslator { } EnumMembers::Groups(groups) => { for (idx, group) in groups.iter().enumerate() { - // TODO: How does this grouping actually impact the enum besides the name? + // IDA's grouped (bitmask) enums partition the members into named bitmask + // groups. Binary Ninja enumerations are flat and have no equivalent grouping + // concept, so we flatten the groups, qualifying each member with its group + // name to keep the names unique and preserve the original grouping intent. let group_name = group .field .name @@ -526,26 +603,31 @@ impl TILTranslator { /// Computes the width of a type, in bytes. pub fn width_of_type(&self, ty: &idb_rs::til::Type) -> anyhow::Result { match &ty.type_variant { + // Keep these widths in lockstep with `build_basic_ty` so that NTR placeholder widths + // match the types they stand in for. TypeVariant::Basic(basic) => match basic { Basic::Void => Ok(0), Basic::Unknown { bytes } => Ok(*bytes as usize), - Basic::Bool => Ok(1), + Basic::Bool => Ok(self.bool_size), Basic::BoolSized { bytes } => Ok(bytes.get() as usize), Basic::Char => Ok(1), Basic::SegReg => Ok(8), - Basic::Short { .. } => Ok(2), - Basic::Long { .. } => Ok(4), - Basic::LongLong { .. } => Ok(8), - Basic::Int { .. } => Ok(4), + Basic::Short { .. } => Ok(self.short_size), + Basic::Long { .. } => Ok(self.long_size), + Basic::LongLong { .. } => Ok(self.long_long_size), + Basic::Int { .. } => Ok(self.int_size), Basic::IntSized { bytes, .. } => Ok(bytes.get() as usize), Basic::Float { bytes } => Ok(bytes.get() as usize), - Basic::LongDouble => Ok(8), + Basic::LongDouble => Ok(self.long_double_size), }, TypeVariant::Pointer(_) => Ok(self.address_size), TypeVariant::Function(_) => Err(anyhow::anyhow!("Function types do not have a width")), TypeVariant::Array(arr) => { let elem_width = self.width_of_type(&arr.elem_type)?; - // TODO: A DST array is unsized or what? I think we should error IMO. + // A flexible array member (no element count) contributes no storage of its own; + // it only names the tail of the containing structure. Treat it as zero-width to + // mirror `build_array_ty`, rather than erroring, so structures ending in one can + // still be sized. let count = arr.nelem.map(|n| n.get()).unwrap_or(0); Ok(elem_width * count as usize) } @@ -565,17 +647,23 @@ impl TILTranslator { for member in &s.members { total_width += self.width_of_type(&member.member_type)?; } - // TODO: Handle alignment and bitfields. + // NOTE: This is the tightly-packed lower bound on the size. It does not + // reintroduce the inter-member alignment padding (or bitfield storage sharing) + // that `build_udt_members` accounts for, because that requires per-type alignment + // which an unresolved type reference does not expose here. It is only used to give + // a referenced struct a placeholder width, so an underestimate is acceptable; the + // authoritative layout comes from `build_udt_ty` once the type is fully resolved. Ok(total_width) } TypeVariant::Union(u) => { - // Size of the largest member + alignment + // Size of the largest member. As with the struct case above, the union's own + // alignment padding is not applied here; this is the placeholder lower bound used + // for type-reference widths, not the final laid-out size. let mut max_width = 0; for member in &u.members { let member_width = self.width_of_type(&member.member_type)?; max_width = max_width.max(member_width); } - // TODO: Handle alignment Ok(max_width) } TypeVariant::Enum(e) => Ok(e From e9dbe80b856b41da7ca37eef013662f6c12f5c1c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:40:20 -0500 Subject: [PATCH 04/25] [idb_import] Resolve parse.rs TODOs - merged_types: carry an ordinal across the dedup when the kept entry lacks one, keeping name/ordinal lookups resolvable, and document that dir_tree types are clones of the same TIL definitions so no body merge is needed. - TIL decompression: read_til already inflates Zlib/Zstd sections via its section header, so document that and drop the stale "decompress til" TODO. - Function registers/stack variables: replace the dead exploratory block with a note scoping it as a follow-up feature (needs FunctionInfo and mapper support to apply named stack variables and register names). --- plugins/idb_import/src/parse.rs | 37 ++++++++++++++------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 95a2fdd0eb..0e68ff5f3d 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -188,11 +188,17 @@ impl IDBInfo { types.extend(til.types.clone()); } types.sort_by_key(|t| t.name.to_string()); + // `a` is the later (dir_tree-then-til) duplicate being removed and `b` is the one we + // keep. In practice the dir_tree types are clones pulled from the same TIL (see + // `parse_dir_tree`), so the definitions are identical; we still carry over an ordinal if + // the kept entry happens to be missing one, so name/ordinal lookups stay resolvable. types.dedup_by(|a, b| { if a.name.to_string() != b.name.to_string() { return false; } - // TODO: Merge types instead of just picking b and not transferring. + if b.ordinal == 0 && a.ordinal != 0 { + b.ordinal = a.ordinal; + } true }); types @@ -261,7 +267,8 @@ impl IDBFileParser { id2 = Some(format.read_id2(&mut *data, id2_loc)?); } - // TODO: Decompress til + // NOTE: `read_til` reads the section header and transparently inflates Zlib/Zstd + // compressed TIL sections, so no explicit decompression step is needed here. let mut til = None; if let Some(til_loc) = format.til_location() { til = Some(format.read_til(&mut *data, til_loc)?); @@ -332,25 +339,13 @@ impl IDBFileParser { continue; } - // TODO: Parse function registers and params - // for def_reg in id0.function_defined_registers(netdelta, &func, &func_ext) { - // tracing::info!("{:0x} : Function register: {:?}", func_start, def_reg); - // let Ok(_def_reg) = def_reg else { - // tracing::warn!("Failed to read function register entry"); - // continue; - // }; - // } - - // if let Ok(stack_names) = - // id0.function_defined_variables(&root_info, &func, &func_ext) - // { - // tracing::info!( - // "{:0x} : Function stack variables: {:#?}", - // func_start, - // stack_names - // ); - // } - + // NOTE: `id0.function_defined_registers(netdelta, &func, &func_ext)` and + // `id0.function_defined_variables(&root_info, &func, &func_ext)` expose the + // function's register definitions and named stack variables. Surfacing + // those requires extending `FunctionInfo` to carry them and teaching the + // mapper how to apply named stack variables / register names to a BN + // function, which depends on the analysed stack frame. That is tracked as a + // follow-up feature rather than parsed here. functions.push(FunctionInfo { name: None, ty: None, From ed2d9323e1052bf6c56dd9ef3eeb2cfd120335b5 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:44:01 -0500 Subject: [PATCH 05/25] [idb_import] Resolve mapper/commands TODOs and surface IDB SHA256 - Populate IDBInfo.sha256 from the input file SHA256 recorded in the IDB so it is no longer always None, and drop the stale placeholder comment. - Mapper logs the recorded SHA256 and documents a future IDB verifier that would compare it against the mapped view before applying data. - Define the fallback `size_t` only when the view lacks one, so a real platform/view definition is never clobbered. - Document that the undo bracketing requires the mapper to be the sole writer, an invariant the run-once loader activity already guarantees. - Document the name-based (not range-based) section dedup rationale: the BN loader already maps the address space, so a range check would suppress every IDA segment. - Replace the remaining design-question TODOs (used-type ordering, attached TIL lookup, per-function platform tuple, OpenFileName filter naming) with decisions/notes explaining the current behavior and future direction. --- plugins/idb_import/src/commands.rs | 3 +- plugins/idb_import/src/mapper.rs | 50 ++++++++++++++++++++---------- plugins/idb_import/src/parse.rs | 12 +++++-- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/plugins/idb_import/src/commands.rs b/plugins/idb_import/src/commands.rs index bc17aa9b4a..f617746386 100644 --- a/plugins/idb_import/src/commands.rs +++ b/plugins/idb_import/src/commands.rs @@ -27,7 +27,8 @@ impl LoadFileField { pub fn field(&self) -> FormInputField { FormInputField::OpenFileName { prompt: "File Path".to_string(), - // TODO: This is called extension but is really a filter. + // NOTE: Binary Ninja's `OpenFileName` field names this `extension`, but it accepts a + // full file filter expression (e.g. "*.idb;*.i64"), which is what we pass here. extension: Some(self.filter.clone()), default: self.default.clone(), value: None, diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 593fdb75cf..0c7428f81f 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -34,9 +34,13 @@ impl IDBMapper { return; }; - // TODO: Actually the below comment belongs in an IDBVerifier that tries to determine if the idb - // TODO: Will process correctly for the given view. - // TODO: Have a shasum check of the file to make sure we are not mapping to bad data? + // NOTE: `self.info.sha256` carries the SHA256 of the original input file as recorded in + // the IDB. A dedicated IDB verifier that compares this against the file backing `view` + // and refuses to map on mismatch (to avoid applying stale data to the wrong binary) is + // left as future work; for now we log it for traceability and proceed with mapping. + if let Some(sha256) = &self.info.sha256 { + tracing::debug!("IDB recorded input file SHA256: {}", sha256); + } // Rebase the address from ida -> binja without this rebased views will fail to map. let bn_base_address = view.start(); @@ -78,12 +82,16 @@ impl IDBMapper { // Create the type translator, adding all referencable types from both the TIL and the binary view. let platform = view.default_platform().unwrap(); - // TODO: Need to remove this, but for now keeping this since it gets referenced a lot. - view.define_auto_type( - "size_t", - "IDA", - &Type::int(platform.arch().default_integer_size(), false), - ); + // Provide a fallback `size_t` only when the view does not already define one. Many IDA + // types reference `size_t`, so we keep this safety net, but defining it conditionally + // avoids clobbering a real platform/view definition with our generic integer. + if view.type_by_name("size_t").is_none() { + view.define_auto_type( + "size_t", + "IDA", + &Type::int(platform.arch().default_integer_size(), false), + ); + } let til_translator = TILTranslator::new_from_platform(&platform).with_type_container(&view.type_container()); let til_translator = match &self.info.til { @@ -105,8 +113,10 @@ impl IDBMapper { self.map_export_to_view(view, &til_translator, &rebased_export); } - // TODO: The below undo and ignore is not thread safe, this means that the mapper itself - // TODO: should be the only thing running at the time of the mapping process. + // NOTE: The undo bracketing below is not thread safe, so the mapper must be the only + // writer mutating the view while it runs. This invariant holds because mapping is + // registered as a single run-once analysis activity (see `plugin_init`) rather than being + // invoked concurrently. let undo = view.file().begin_undo_actions(true); for comment in &self.info.merged_comments() { let mut rebased_comment = comment.clone(); @@ -175,8 +185,10 @@ impl IDBMapper { if let Ok(_used_types) = til_translator.used_types.lock() { used_types = _used_types.clone(); } - // TODO: Adding types to view after the types have been applied to the functions is not a - // TODO: great idea, I imagine the NTR's will have stale references until the analysis runs again. + // NOTE: Types are resolved here after they have already been applied to functions, so any + // named type references that get defined in this pass will read as stale until analysis + // re-runs and re-resolves them. Acceptable for this best-effort backfill, but a reason to + // resolve referenced types before applying function types if this path is wired in. 'found: for used_ty in &used_types { // 0. Make sure the type doesn't already exist in the view if view.type_by_name(&used_ty.name).is_some() { @@ -210,8 +222,9 @@ impl IDBMapper { } } + // NOTE: A further lookup source would be the TILs attached to the IDB (e.g. imported + // library TILs) before giving up; not yet wired in. tracing::warn!("Failed to find type: {:?}", used_ty); - // 4. TODO: Look through the idb attached tils? } } @@ -243,7 +256,10 @@ impl IDBMapper { SegmentType::Imem => Semantics::DefaultSection, }; - // TODO: Is this section already mapped using address range not name. + // NOTE: We dedup on section name rather than address range on purpose. The BN loader has + // usually already mapped the whole address space, so an IDA segment would always overlap + // an existing section and a range-based check would suppress every segment. Name-based + // dedup only skips re-adding a section we (or the loader) already named identically. if view.section_by_name(&segment.name).is_some() { tracing::debug!( "Section with name '{}' already exists, skipping...", @@ -344,7 +360,9 @@ impl IDBMapper { } } - // TODO: Attach a platform tuple to the FunctionInfo? + // NOTE: We let the function inherit the view's default platform. Carrying an explicit + // platform on FunctionInfo would let mixed-architecture databases (e.g. ARM/Thumb + // interworking) map each function with its own platform; that is a future enhancement. if let Some(func_sym) = symbol_from_func(func) { tracing::debug!( "Mapping function symbol: {:0x} => {}", diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 0e68ff5f3d..20d17ef6da 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -279,10 +279,18 @@ impl IDBFileParser { _ => None, }; + // The IDB records the SHA256 of the original input file; surface it so consumers (and a + // future verifier) can confirm the IDB matches the binary being mapped. + let sha256 = id0.as_ref().and_then(|id0| { + let root_idx = id0.root_node().ok()?; + let hash = id0.input_file_sha256(root_idx).ok()??; + Some(hash.iter().map(|b| format!("{:02x}", b)).collect::()) + }); + let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; Ok(IDBInfo { - sha256: None, + sha256, id0: id0_info, til, dir_tree: dir_tree_info, @@ -447,8 +455,6 @@ impl IDBFileParser { let root_info = id0.ida_info(root_info_idx)?; let netdelta = root_info.netdelta(); - // sha256 - let func_info_from_addr = |addr_info: &AddressInfo| -> anyhow::Result> { let func_name = addr_info.label()?.map(|s| s.to_string()); From 5782c439c332976ff60b1d8bb0b5bd5ff1fb830c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:51:10 -0500 Subject: [PATCH 06/25] [idb_import] Verify IDB matches the loaded binary by SHA256 The IDB records the SHA256 of its original input file. Walk to the root of the view's parent chain (the raw view, whose bytes are that on-disk file), hash it in 1 MiB chunks, and compare against the recorded hash. On mismatch we warn that the imported data may not correspond to the binary; on match we log the verification at debug level. --- Cargo.lock | 66 ++++++++++++++++++++++++++++++++ plugins/idb_import/Cargo.toml | 3 +- plugins/idb_import/src/mapper.rs | 62 +++++++++++++++++++++++++++--- 3 files changed, 124 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index efe2bfe067..47af8f9e20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -271,6 +271,15 @@ dependencies = [ "serde", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.6.2" @@ -581,6 +590,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -654,6 +672,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlc" version = "3.5.1" @@ -759,6 +787,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2ea6706d74fca54e15f1d40b5cf7fe7f764aaec61352a9fcec58fe27e042fc8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "directories" version = "6.0.0" @@ -1093,6 +1131,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -1291,6 +1339,7 @@ dependencies = [ "idb-rs", "serde", "serde_json", + "sha2", "tracing", ] @@ -2585,6 +2634,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2972,6 +3032,12 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/plugins/idb_import/Cargo.toml b/plugins/idb_import/Cargo.toml index ce94b6f67e..191f123e53 100644 --- a/plugins/idb_import/Cargo.toml +++ b/plugins/idb_import/Cargo.toml @@ -16,4 +16,5 @@ binaryninjacore-sys.workspace = true idb-rs = { git = "https://github.com/Vector35/idb-rs", tag = "0.1.13" } tracing = "0.1" serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +sha2 = "0.10" \ No newline at end of file diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 0c7428f81f..b6be5d6997 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -14,6 +14,7 @@ use binaryninja::symbol::{Binding, Symbol, SymbolType}; use binaryninja::types::Type; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; +use sha2::{Digest, Sha256}; use std::collections::HashSet; /// Maps IDB data into a [`BinaryView`]. @@ -34,12 +35,26 @@ impl IDBMapper { return; }; - // NOTE: `self.info.sha256` carries the SHA256 of the original input file as recorded in - // the IDB. A dedicated IDB verifier that compares this against the file backing `view` - // and refuses to map on mismatch (to avoid applying stale data to the wrong binary) is - // left as future work; for now we log it for traceability and proceed with mapping. - if let Some(sha256) = &self.info.sha256 { - tracing::debug!("IDB recorded input file SHA256: {}", sha256); + // Verify the IDB was built from the binary we are about to map into. The IDB records the + // SHA256 of its original input file; the root of the view's parent chain is the raw view, + // whose bytes are that same on-disk file, so we can hash it and compare. + if let Some(expected_sha256) = &self.info.sha256 { + match sha256_of_raw_view(view) { + Some(actual_sha256) if actual_sha256 == *expected_sha256 => { + tracing::debug!("Verified IDB matches loaded binary (SHA256 {expected_sha256})"); + } + Some(actual_sha256) => { + tracing::warn!( + "IDB input file SHA256 ({expected_sha256}) does not match the loaded \ + binary ({actual_sha256}); imported data may not correspond to this binary." + ); + } + None => { + tracing::debug!( + "Could not hash the loaded binary to verify against IDB SHA256 {expected_sha256}" + ); + } + } } // Rebase the address from ida -> binja without this rebased views will fail to map. @@ -459,6 +474,41 @@ impl IDBMapper { } } +/// Compute the SHA256 of the raw (on-disk) bytes backing `view`. +/// +/// Walks to the root of the parent-view chain, which is the raw view whose contents are the +/// original input file, then hashes its full contents. Returns `None` if the view is empty. +fn sha256_of_raw_view(view: &BinaryView) -> Option { + let mut raw_view = view.to_owned(); + while let Some(parent) = raw_view.parent_view() { + raw_view = parent; + } + + let length = raw_view.len(); + if length == 0 { + return None; + } + + // Hash in chunks so we never hold the whole binary in memory at once. + const CHUNK_SIZE: usize = 1 << 20; + let mut hasher = Sha256::new(); + let mut offset = raw_view.start(); + let end = offset + length; + while offset < end { + let want = CHUNK_SIZE.min((end - offset) as usize); + let chunk = raw_view.read_vec(offset, want); + if chunk.is_empty() { + // The view reported a length we cannot fully read; treat as unverifiable. + return None; + } + hasher.update(&chunk); + offset += chunk.len() as u64; + } + + let digest = hasher.finalize(); + Some(digest.iter().map(|b| format!("{:02x}", b)).collect()) +} + fn symbol_from_func(func: &FunctionInfo) -> Option> { let Some(func_name) = &func.name else { return None; From cd7160362772139a689ea15db15e606e22655f90 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 01:58:37 -0500 Subject: [PATCH 07/25] [idb_import] Recover argument locations and register variables - Argument locations: translate IDA stack-passed argument locations (ArgLoc::Stack) into Binary Ninja parameter stack locations so explicit stack parameter placement is preserved. Register-encoded locations carry raw IDA register indices with no portable BN mapping and are left for analysis to derive. - Register variables: parse IDA "regvars" (a register renamed by the user within a function) into FunctionInfo, carrying them through the function merge, and apply them in the mapper by resolving the register by name and creating a user variable typed to the register width. --- plugins/idb_import/src/mapper.rs | 42 +++++++++++++++++- plugins/idb_import/src/parse.rs | 52 ++++++++++++++++++---- plugins/idb_import/src/translate.rs | 69 ++++++++++++++++------------- 3 files changed, 124 insertions(+), 39 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index b6be5d6997..dee9b9b562 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -5,13 +5,14 @@ use crate::parse::{ SegmentInfo, }; use crate::translate::TILTranslator; -use binaryninja::architecture::Architecture; +use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; use binaryninja::symbol::{Binding, Symbol, SymbolType}; use binaryninja::types::Type; +use binaryninja::variable::Variable; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; use sha2::{Digest, Sha256}; @@ -321,6 +322,7 @@ impl IDBMapper { address: export.address, is_library: false, is_no_return: false, + register_vars: Vec::new(), }; self.map_func_to_view(view, til_translator, &func_info); } else { @@ -386,6 +388,44 @@ impl IDBMapper { ); view.define_auto_symbol(&func_sym); } + + self.map_register_vars_to_func(&bn_func, func); + } + + /// Apply IDA register variables ("regvars") to a function. + /// + /// IDA records that a register holds a user-named value over an address range. Binary Ninja + /// variables are function-scoped, so the range is not modeled; we name the architecture + /// register over the whole function, typed as an unsigned int of the register's width. + fn map_register_vars_to_func( + &self, + bn_func: &binaryninja::function::Function, + func: &FunctionInfo, + ) { + if func.register_vars.is_empty() { + return; + } + let arch = bn_func.arch(); + for reg_var in &func.register_vars { + let Some(register) = arch.register_by_name(®_var.register) else { + tracing::warn!( + "Skipping register variable '{}': unknown register '{}' at {:0x}", + reg_var.name, + reg_var.register, + func.address + ); + continue; + }; + let reg_width = register.info().size().max(1); + let var = Variable::from_register(register); + tracing::debug!( + "Mapping register variable {} => {} at {:0x}", + reg_var.register, + reg_var.name, + func.address + ); + bn_func.create_user_var(&var, &Type::int(reg_width, false), ®_var.name, false); + } } pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 20d17ef6da..157ec5f230 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -27,6 +27,19 @@ pub struct FunctionInfo { pub address: u64, pub is_library: bool, pub is_no_return: bool, + pub register_vars: Vec, +} + +/// A register renamed by the user within a function (IDA "regvar"), e.g. `eax` -> `count`. +#[derive(Debug, Clone, Serialize)] +pub struct RegisterVarInfo { + /// Architecture register the variable lives in (e.g. `eax`). + pub register: String, + /// User-assigned name for the register over its range. + pub name: String, + pub start: u64, + pub end: u64, + pub comment: String, } #[derive(Debug, Clone, Serialize)] @@ -134,6 +147,9 @@ impl IDBInfo { if a.ty.is_some() { b.ty = a.ty.clone(); } + if !a.register_vars.is_empty() { + b.register_vars = a.register_vars.clone(); + } true }); id0_functions @@ -341,25 +357,44 @@ impl IDBFileParser { IDBFunctionType::Tail(_) => { tracing::debug!("Skipping tail function... {:0x}", func_start); } - IDBFunctionType::NonTail(_func_ext) => { + IDBFunctionType::NonTail(func_ext) => { if func.flags.is_outline() { tracing::debug!("Skipping outlined function... {:0x}", func_start); continue; } - // NOTE: `id0.function_defined_registers(netdelta, &func, &func_ext)` and - // `id0.function_defined_variables(&root_info, &func, &func_ext)` expose the - // function's register definitions and named stack variables. Surfacing - // those requires extending `FunctionInfo` to carry them and teaching the - // mapper how to apply named stack variables / register names to a BN - // function, which depends on the analysed stack frame. That is tracked as a - // follow-up feature rather than parsed here. + // Collect register variables (IDA "regvars"): registers the user renamed + // over a range within the function. The register is given by name, so it + // maps cleanly onto a Binary Ninja register in the mapper. + let mut register_vars = Vec::new(); + for reg in id0.function_defined_registers(netdelta, &func, func_ext) { + let reg = match reg { + Ok(reg) => reg, + Err(err) => { + tracing::warn!( + "Failed to read register variable for {:0x}: {}", + func_start, + err + ); + continue; + } + }; + register_vars.push(RegisterVarInfo { + register: reg.register_name.to_string(), + name: reg.variable_name.to_string(), + start: reg.range.start.into_raw().into_u64(), + end: reg.range.end.into_raw().into_u64(), + comment: reg.cmt.to_string(), + }); + } + functions.push(FunctionInfo { name: None, ty: None, address: func_start, is_library: func.flags.is_lib(), is_no_return: func.flags.is_no_return(), + register_vars, }); } } @@ -466,6 +501,7 @@ impl IDBFileParser { address: func_addr, is_library: false, is_no_return: false, + register_vars: Vec::new(), })) }; diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index 4eb4b81d64..ee57bc084c 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -9,9 +9,10 @@ use binaryninja::rc::Ref; use binaryninja::types::{ EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, - TypeContainer, + TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, }; -use idb_rs::til::function::CallingConvention; +use binaryninja::variable::Variable; +use idb_rs::til::function::{ArgLoc, CallingConvention}; use idb_rs::til::pointer::PointerModifier; use idb_rs::til::r#enum::EnumMembers; use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; @@ -310,9 +311,12 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // NOTE: `function_ty.retloc` carries the explicit return-value location, but mapping an - // IDA `ArgLoc` onto a Binary Ninja variable storage location is not yet implemented, so - // the return location is left to be derived by analysis. + // NOTE: `function_ty.retloc` (and the per-argument `arg.loc`) carry explicit storage + // locations as IDA `ArgLoc`s. Honoring them requires translating IDA's register-relative + // encodings (`Reg1`/`Reg2`/`RRel` hold raw IDA register indices) into BN register ids, + // which is processor-specific and has no general mapping available here. Until that + // per-architecture register map exists we let the calling convention derive locations, + // which is correct for the standard conventions we already map. let return_ty = self.translate_type_info(&function_ty.ret)?; let params: Vec = self.build_function_params(&function_ty.args)?; // An ellipsis calling convention is IDA's marker for a variadic function. @@ -373,8 +377,9 @@ impl TILTranslator { .clone() .map(|s| s.to_string()) .unwrap_or_else(|| format!("arg{}", idx)); + let location = value_location_from_arg_loc(arg.loc.as_ref()); self.translate_type_info(&arg.ty) - .map(|ty| FunctionParameter::new(ty, arg_name, None)) + .map(|ty| FunctionParameter::new(ty, arg_name, location)) }) .collect() } @@ -642,30 +647,13 @@ impl TILTranslator { anyhow::anyhow!("Type reference has no width: {:?}", resolved_ty) }) } - TypeVariant::Struct(s) => { - let mut total_width = 0; - for member in &s.members { - total_width += self.width_of_type(&member.member_type)?; - } - // NOTE: This is the tightly-packed lower bound on the size. It does not - // reintroduce the inter-member alignment padding (or bitfield storage sharing) - // that `build_udt_members` accounts for, because that requires per-type alignment - // which an unresolved type reference does not expose here. It is only used to give - // a referenced struct a placeholder width, so an underestimate is acceptable; the - // authoritative layout comes from `build_udt_ty` once the type is fully resolved. - Ok(total_width) - } - TypeVariant::Union(u) => { - // Size of the largest member. As with the struct case above, the union's own - // alignment padding is not applied here; this is the placeholder lower bound used - // for type-reference widths, not the final laid-out size. - let mut max_width = 0; - for member in &u.members { - let member_width = self.width_of_type(&member.member_type)?; - max_width = max_width.max(member_width); - } - Ok(max_width) - } + // Build the structure/union through the same path used for real translation and read + // back its finalized width, so alignment padding, bitfield storage sharing and tail + // padding are all accounted for instead of approximated. This recurses, but C types + // can only nest by value finitely (self-reference is only possible through a pointer, + // which is sized by `address_size`), so it always terminates. + TypeVariant::Struct(s) => Ok(self.build_udt_ty(s, false)?.finalize().width() as usize), + TypeVariant::Union(u) => Ok(self.build_udt_ty(u, true)?.finalize().width() as usize), TypeVariant::Enum(e) => Ok(e .storage_size .map(|s| s.get() as usize) @@ -692,3 +680,24 @@ impl TILTranslator { } } } + +/// Translate an IDA argument storage location into a Binary Ninja value location. +/// +/// Stack-passed arguments translate directly (the offset is architecture independent). The +/// register-based encodings (`Reg1`/`Reg2`/`RRel`) hold raw IDA register indices, and `Dist`/ +/// `Static`/custom forms have no straightforward equivalent, so those are left for analysis to +/// derive rather than guessing at a register mapping. +fn value_location_from_arg_loc(loc: Option<&ArgLoc>) -> ValueLocationSource { + match loc { + Some(ArgLoc::Stack(offset)) => ValueLocationSource::Custom(ValueLocation { + components: vec![ValueLocationComponent { + variable: Variable::from_stack_offset(*offset as i64), + offset: 0, + size: None, + }], + indirect: false, + returned_pointer: None, + }), + _ => ValueLocationSource::Default, + } +} From dfe2670f2458387fabdc7255713e7636c9d1e83d Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:03:01 -0500 Subject: [PATCH 08/25] [idb_import] Import IDA stack frames as stack variables Parse each function's stack frame (named locals, saved registers and stack arguments) from the IDB along with its geometry (frsize/frregs), carry it on FunctionInfo through the function merge, and apply it in the mapper. IDA records the frame as a structure running from the bottom of the locals upward; Binary Ninja measures stack offsets from the return address, so an IDA frame offset is shifted down by local_size + saved_regs_size. Member offsets are the running sum of preceding member widths (the frame members carry no explicit offset), and the synthetic saved-register/return-address members are skipped while still advancing the offset. Variables are created as auto stack variables typed from their translated IDB types. --- plugins/idb_import/src/mapper.rs | 65 ++++++++++++++++++++++++++++++++ plugins/idb_import/src/parse.rs | 41 ++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index dee9b9b562..da53b1d1c0 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -323,6 +323,7 @@ impl IDBMapper { is_library: false, is_no_return: false, register_vars: Vec::new(), + stack_frame: None, }; self.map_func_to_view(view, til_translator, &func_info); } else { @@ -390,6 +391,70 @@ impl IDBMapper { } self.map_register_vars_to_func(&bn_func, func); + self.map_stack_frame_to_func(&bn_func, til_translator, func); + } + + /// Apply a function's IDA stack frame as Binary Ninja stack variables. + /// + /// IDA stores the frame as a structure whose members run from the bottom of the local + /// variable area upward through the saved registers, return address and stack arguments. + /// Binary Ninja measures stack offsets from the return address (locals negative, arguments + /// positive), so an IDA frame offset is shifted down by `local_size + saved_regs_size`. The + /// frame members do not carry explicit offsets, so each member's offset is the running sum of + /// the preceding member widths; the synthetic saved-register and return-address members are + /// skipped (but still advance the offset) since they are not user variables. + fn map_stack_frame_to_func( + &self, + bn_func: &binaryninja::function::Function, + til_translator: &TILTranslator, + func: &FunctionInfo, + ) { + let Some(stack_frame) = &func.stack_frame else { + return; + }; + let base = (stack_frame.local_size + stack_frame.saved_regs_size) as i64; + let mut frame_offset: u64 = 0; + for (i, member) in stack_frame.frame.members.iter().enumerate() { + let member_width = til_translator + .width_of_type(&member.member_type) + .unwrap_or(0); + + // Skip the saved-register / return-address regions, but keep advancing the offset so + // the members after them still land at the right place. + if member.is_frame_r || member.is_frame_s { + frame_offset += member_width as u64; + continue; + } + + let name = member + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| format!("var_{i}")); + let bn_offset = frame_offset as i64 - base; + match til_translator.translate_type_info(&member.member_type) { + Ok(ty) => { + tracing::debug!( + "Mapping stack variable '{}' at frame offset {} (bn {}) for {:0x}", + name, + frame_offset, + bn_offset, + func.address + ); + bn_func.create_auto_stack_var(bn_offset, &ty, &name); + } + Err(err) => { + tracing::warn!( + "Failed to translate stack variable '{}' type for {:0x}: {}", + name, + func.address, + err + ); + } + } + + frame_offset += member_width as u64; + } } /// Apply IDA register variables ("regvars") to a function. diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 157ec5f230..a225530964 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -28,6 +28,7 @@ pub struct FunctionInfo { pub is_library: bool, pub is_no_return: bool, pub register_vars: Vec, + pub stack_frame: Option, } /// A register renamed by the user within a function (IDA "regvar"), e.g. `eax` -> `count`. @@ -42,6 +43,21 @@ pub struct RegisterVarInfo { pub comment: String, } +/// A function's stack frame, as recorded by IDA. +/// +/// The `frame` UDT describes every member of the frame (locals, saved registers, return +/// address and arguments). `local_size` (IDA's `frsize`) and `saved_regs_size` (`frregs`) give +/// the geometry needed to translate IDA frame offsets into Binary Ninja's frame convention. +#[derive(Debug, Clone, Serialize)] +pub struct StackFrameInfo { + /// Size in bytes of the local variables area (IDA `frsize`). + pub local_size: u64, + /// Size in bytes of the saved registers area (IDA `frregs`). + pub saved_regs_size: u64, + /// The frame structure describing each stack member. + pub frame: idb_rs::til::udt::UDT, +} + #[derive(Debug, Clone, Serialize)] pub struct ExportInfo { pub name: String, @@ -150,6 +166,9 @@ impl IDBInfo { if !a.register_vars.is_empty() { b.register_vars = a.register_vars.clone(); } + if a.stack_frame.is_some() { + b.stack_frame = a.stack_frame.clone(); + } true }); id0_functions @@ -388,6 +407,26 @@ impl IDBFileParser { }); } + // Collect the function's stack frame (named locals, saved registers and + // stack arguments) along with the frame geometry needed to place them. + let stack_frame = match id0 + .function_defined_variables(&root_info, &func, func_ext) + { + Ok(stack_names) => stack_names.ty.map(|frame| StackFrameInfo { + local_size: func_ext.frsize.into_u64(), + saved_regs_size: func_ext.frregs as u64, + frame, + }), + Err(err) => { + tracing::warn!( + "Failed to read stack frame for {:0x}: {}", + func_start, + err + ); + None + } + }; + functions.push(FunctionInfo { name: None, ty: None, @@ -395,6 +434,7 @@ impl IDBFileParser { is_library: func.flags.is_lib(), is_no_return: func.flags.is_no_return(), register_vars, + stack_frame, }); } } @@ -502,6 +542,7 @@ impl IDBFileParser { is_library: false, is_no_return: false, register_vars: Vec::new(), + stack_frame: None, })) }; From 51a30f02deb873736c63da0495fb01b8897fcb90 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:04:29 -0500 Subject: [PATCH 09/25] [idb_import] Apply no-return functions and local labels Two pieces of IDB data were parsed but never applied to the view: - is_no_return: mark functions IDA flags as non-returning (abort/exit/etc.) with set_auto_can_return(false) so analysis does not fall through calls to them. - Local labels: IDA's in-function named locations were folded into the name list, where map_name_to_view skips anything inside code. Route them through the dedicated map_label_to_view so they land as local-label symbols. --- plugins/idb_import/src/mapper.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index da53b1d1c0..5b9c395965 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -147,6 +147,14 @@ impl IDBMapper { self.map_name_to_view(view, &til_translator, &rebased_name); } + // IDA's local labels are named locations inside functions (e.g. loop targets). They live + // in code, so `map_name_to_view` skips them; apply them as local-label symbols instead. + for label in &id0.labels { + let mut rebased_label = label.clone(); + rebased_label.address = rebase(label.address); + self.map_label_to_view(view, &rebased_label); + } + // self.map_used_types_to_view(view, &til_translator); } @@ -361,6 +369,13 @@ impl IDBMapper { return; }; + // IDA marks functions that never return (e.g. `abort`, `exit`); carry that over so BN's + // analysis does not fall through past calls to them. + if func.is_no_return { + tracing::debug!("Marking function as no-return: {:0x}", func.address); + bn_func.set_auto_can_return(false); + } + if let Some(func_ty) = &func.ty { match til_translator.translate_type_info(&func_ty) { Ok(bn_func_ty) => { From 8cffc1c2c83750d8f3a26303a9bf92ece12c5d7c Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:34:45 -0500 Subject: [PATCH 10/25] [idb_import] Import IDA function folders as Binary Ninja components IDA lets users organize functions into folders in the Functions window, stored as a dirtree. Parse that hierarchy (preserving nested folders, not just the leaf functions) into FunctionFolderEntry, and recreate it in the view as Binary Ninja components: each folder becomes a component nested under its parent, and every function leaf is added to its folder's component. Functions sitting at the dirtree root are left uncomponented, matching their "no folder" state in IDA. --- plugins/idb_import/src/mapper.rs | 61 ++++++++++++++++++++++++++++++-- plugins/idb_import/src/parse.rs | 43 ++++++++++++++++++++-- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 5b9c395965..3bf27a998e 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -1,12 +1,13 @@ //! Map the IDB data we parsed into the [`BinaryView`]. use crate::parse::{ - BaseAddressInfo, CommentInfo, ExportInfo, FunctionInfo, IDBInfo, LabelInfo, NameInfo, - SegmentInfo, + BaseAddressInfo, CommentInfo, ExportInfo, FunctionFolderEntry, FunctionInfo, IDBInfo, + LabelInfo, NameInfo, SegmentInfo, }; use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; +use binaryninja::component::{Component, ComponentBuilder}; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; @@ -155,6 +156,18 @@ impl IDBMapper { self.map_label_to_view(view, &rebased_label); } + // Recreate IDA's "Functions" window folder hierarchy as Binary Ninja components. + if let Some(dir_tree) = &self.info.dir_tree { + if !dir_tree.function_folders.is_empty() { + self.map_function_folders_to_view( + view, + &dir_tree.function_folders, + None, + &rebase, + ); + } + } + // self.map_used_types_to_view(view, &til_translator); } @@ -508,6 +521,50 @@ impl IDBMapper { } } + /// Recreate an IDA function folder tree as Binary Ninja components. + /// + /// Each IDA folder becomes a component nested under its parent (the root view component for + /// top-level folders), and each function leaf is added to the component for its folder. + /// Functions sitting directly at the dirtree root are left uncomponented, matching their + /// "no folder" status in IDA. + fn map_function_folders_to_view( + &self, + view: &BinaryView, + entries: &[FunctionFolderEntry], + parent: Option<&Component>, + rebase: &impl Fn(u64) -> u64, + ) { + for entry in entries { + match entry { + FunctionFolderEntry::Function(address) => { + let Some(parent) = parent else { + continue; + }; + let rebased = rebase(*address); + let functions_at = view.functions_at(rebased); + let func = functions_at.iter().find(|func| func.start() == rebased); + if let Some(func) = func { + parent.add_function(&func); + } else { + tracing::debug!( + "No function at {:0x} to place in folder, skipping", + rebased + ); + } + } + FunctionFolderEntry::Folder { name, entries } => { + let mut builder = ComponentBuilder::new(view.to_owned()).name(name.clone()); + if let Some(parent) = parent { + builder = builder.parent(parent.guid()); + } + let component = builder.finalize(); + tracing::debug!("Created component for folder '{}'", name); + self.map_function_folders_to_view(view, entries, Some(&component), rebase); + } + } + } + } + pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { let symbol = Symbol::builder(SymbolType::LocalLabel, &label.label, label.address).create(); tracing::debug!("Mapping label: {:0x} => {}", label.address, symbol); diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index a225530964..b05f13b6aa 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -2,7 +2,7 @@ use idb_rs::addr_info::{all_address_info, AddressInfo}; use idb_rs::id0::function::{FuncIdx, FuncordsIdx, IDBFunctionType}; -use idb_rs::id0::{ID0Section, Netdelta, SegmentType}; +use idb_rs::id0::{DirTreeEntry, ID0Section, Netdelta, SegmentType}; use idb_rs::id1::ID1Section; use idb_rs::id2::ID2Section; use idb_rs::til::section::TILSection; @@ -123,6 +123,20 @@ pub struct DirTreeInfo { /// Contains both function and data names (along with their types). pub names: Vec, pub comments: Vec, + /// The IDA "Functions" window folder hierarchy (root-level entries). + pub function_folders: Vec, +} + +/// An entry in IDA's function folder tree: either a function (by address) or a named folder +/// containing further entries. Mirrors IDA's dirtree so it can be recreated as Binary Ninja +/// components. +#[derive(Debug, Clone, Serialize)] +pub enum FunctionFolderEntry { + Function(u64), + Folder { + name: String, + entries: Vec, + }, } #[derive(Debug, Clone, Serialize, Default)] @@ -588,8 +602,9 @@ impl IDBFileParser { comments.extend(comment_info_from_addr(&addr_info)); } + let func_dir_tree = id0.dirtree_function_address()?; let mut functions = Vec::new(); - if let Some(func_dir_tree) = id0.dirtree_function_address()? { + if let Some(func_dir_tree) = &func_dir_tree { func_dir_tree.visit_leafs(|addr_raw| { let addr = Address::from_raw(*addr_raw); if let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, addr) { @@ -600,6 +615,13 @@ impl IDBFileParser { }); } + // Preserve the folder hierarchy (not just the leaf functions) so it can be recreated as + // Binary Ninja components. + let function_folders = func_dir_tree + .as_ref() + .map(|tree| build_function_folders::(&tree.entries)) + .unwrap_or_default(); + let mut names = Vec::new(); if let Some(names_dir_tree) = id0.dirtree_names()? { names_dir_tree.visit_leafs(|name_raw| { @@ -631,6 +653,23 @@ impl IDBFileParser { types, names, comments, + function_folders, }) } } + +/// Recursively convert an IDA function dirtree into our [`FunctionFolderEntry`] tree. +fn build_function_folders( + entries: &[DirTreeEntry], +) -> Vec { + entries + .iter() + .map(|entry| match entry { + DirTreeEntry::Leaf(address) => FunctionFolderEntry::Function((*address).into_u64()), + DirTreeEntry::Directory { name, entries } => FunctionFolderEntry::Folder { + name: String::from_utf8_lossy(name).to_string(), + entries: build_function_folders::(entries), + }, + }) + .collect() +} From 541cd9fc7a5a91121d531185b6c13d5145fd6ba0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:39:56 -0500 Subject: [PATCH 11/25] [idb_import] Resolve register-based argument locations Expose the processor's register names (indexed by IDA register number) from the database and hand them to the type translator along with the architecture. Argument locations encoded as registers (Reg1, the Reg2 register pair, and register-relative RRel) are now resolved through those names into Binary Ninja registers and emitted as value locations, in addition to the stack locations already handled. Forms with no equivalent (distributed, static, custom) still fall back to the calling convention. --- plugins/idb_import/src/mapper.rs | 5 +- plugins/idb_import/src/parse.rs | 12 ++++ plugins/idb_import/src/translate.rs | 97 +++++++++++++++++++++++------ 3 files changed, 93 insertions(+), 21 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 3bf27a998e..d197e3f5df 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -109,8 +109,9 @@ impl IDBMapper { &Type::int(platform.arch().default_integer_size(), false), ); } - let til_translator = - TILTranslator::new_from_platform(&platform).with_type_container(&view.type_container()); + let til_translator = TILTranslator::new_from_platform(&platform) + .with_type_container(&view.type_container()) + .with_register_names(id0.register_names.clone()); let til_translator = match &self.info.til { Some(til) => til_translator.with_til_info(&til), None => til_translator, diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index b05f13b6aa..fb527d1503 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -114,6 +114,9 @@ pub struct ID0Info { pub comments: Vec, pub labels: Vec, pub exports: Vec, + /// Processor register names indexed by IDA register number, used to resolve the registers + /// referenced by argument/return value locations into Binary Ninja registers. + pub register_names: Vec, } #[derive(Debug, Clone, Serialize)] @@ -477,6 +480,14 @@ impl IDBFileParser { (loading_base, _) => BaseAddressInfo::BaseSegment(loading_base.into_u64()), }; + // The processor module defines the register names by index; this lets us resolve the + // registers referenced by argument/return value locations into real registers. + let register_names = id0 + .processor(&root_info) + .and_then(|processor| processor.registers_info()) + .map(|info| info.names.iter().map(|name| name.to_string()).collect()) + .unwrap_or_default(); + Ok(ID0Info { base_address, segments, @@ -484,6 +495,7 @@ impl IDBFileParser { comments, labels, exports, + register_names, }) } diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index ee57bc084c..d41380be6e 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -95,6 +95,10 @@ pub struct TILTranslator { pub cdecl_calling_convention: Option>, pub stdcall_calling_convention: Option>, pub fastcall_calling_convention: Option>, + /// Architecture used to resolve register names into register ids for value locations. + pub arch: Option, + /// Processor register names indexed by IDA register number (see [`crate::parse::ID0Info`]). + pub register_names: Vec, } impl TILTranslator { @@ -115,6 +119,8 @@ impl TILTranslator { cdecl_calling_convention: None, stdcall_calling_convention: None, fastcall_calling_convention: None, + arch: None, + register_names: Vec::new(), } } @@ -135,6 +141,8 @@ impl TILTranslator { cdecl_calling_convention: platform.get_cdecl_calling_convention(), stdcall_calling_convention: platform.get_stdcall_calling_convention(), fastcall_calling_convention: platform.get_fastcall_calling_convention(), + arch: Some(platform.arch()), + register_names: Vec::new(), } } @@ -155,9 +163,18 @@ impl TILTranslator { cdecl_calling_convention: arch.get_cdecl_calling_convention(), stdcall_calling_convention: arch.get_stdcall_calling_convention(), fastcall_calling_convention: arch.get_fastcall_calling_convention(), + arch: Some(*arch), + register_names: Vec::new(), } } + /// Provide the processor register names (indexed by IDA register number) used to resolve + /// register-based argument and return value locations. + pub fn with_register_names(mut self, register_names: Vec) -> Self { + self.register_names = register_names; + self + } + pub fn with_til_info(mut self, til: &idb_rs::til::section::TILSection) -> Self { if let Some(size_enum) = til.header.size_enum { self.enum_size = size_enum.get() as usize; @@ -377,7 +394,7 @@ impl TILTranslator { .clone() .map(|s| s.to_string()) .unwrap_or_else(|| format!("arg{}", idx)); - let location = value_location_from_arg_loc(arg.loc.as_ref()); + let location = self.value_location_from_arg_loc(arg.loc.as_ref()); self.translate_type_info(&arg.ty) .map(|ty| FunctionParameter::new(ty, arg_name, location)) }) @@ -679,25 +696,67 @@ impl TILTranslator { _ => None, } } + + /// Translate an IDA argument/return storage location into a Binary Ninja value location. + /// + /// Returns [`ValueLocationSource::Default`] for locations we cannot represent (or whose + /// registers cannot be resolved), letting the calling convention derive the location. + fn value_location_from_arg_loc(&self, loc: Option<&ArgLoc>) -> ValueLocationSource { + match self.value_location_components(loc) { + Some((components, indirect)) => ValueLocationSource::Custom(ValueLocation { + components, + indirect, + returned_pointer: None, + }), + None => ValueLocationSource::Default, + } + } + + /// Resolve an IDA [`ArgLoc`] into Binary Ninja value-location components and whether the + /// location is indirect (the value lives in memory at the component). Returns `None` for + /// forms with no representation, or when a referenced register cannot be resolved. + fn value_location_components( + &self, + loc: Option<&ArgLoc>, + ) -> Option<(Vec, bool)> { + match loc? { + ArgLoc::Stack(offset) => Some((vec![stack_component(*offset as i64)], false)), + ArgLoc::Reg1(reg) => Some((vec![self.register_component(*reg, 0)?], false)), + ArgLoc::Reg2(regs) => { + // The low and high 16 bits each hold a register index; the value is split across + // the two registers. + let low = self.register_component(regs & 0xFFFF, 0)?; + let high = self.register_component((regs >> 16) & 0xFFFF, 0)?; + Some((vec![low, high], false)) + } + ArgLoc::RRel { reg, off } => { + // Register-relative: the value lives in memory at register + offset. + Some((vec![self.register_component(u32::from(*reg), *off as i64)?], true)) + } + // Distributed, static (global address) and none/custom forms have no direct mapping. + ArgLoc::Dist(_) | ArgLoc::Static(_) | ArgLoc::None => None, + } + } + + /// Build a value-location component for an IDA register index, resolving it through the + /// processor register names and the architecture. + fn register_component(&self, reg_index: u32, offset: i64) -> Option { + let arch = self.arch.as_ref()?; + let name = self.register_names.get(reg_index as usize)?; + let register = arch.register_by_name(name)?; + Some(ValueLocationComponent { + variable: Variable::from_register(register), + offset, + size: None, + }) + } } -/// Translate an IDA argument storage location into a Binary Ninja value location. -/// -/// Stack-passed arguments translate directly (the offset is architecture independent). The -/// register-based encodings (`Reg1`/`Reg2`/`RRel`) hold raw IDA register indices, and `Dist`/ -/// `Static`/custom forms have no straightforward equivalent, so those are left for analysis to -/// derive rather than guessing at a register mapping. -fn value_location_from_arg_loc(loc: Option<&ArgLoc>) -> ValueLocationSource { - match loc { - Some(ArgLoc::Stack(offset)) => ValueLocationSource::Custom(ValueLocation { - components: vec![ValueLocationComponent { - variable: Variable::from_stack_offset(*offset as i64), - offset: 0, - size: None, - }], - indirect: false, - returned_pointer: None, - }), - _ => ValueLocationSource::Default, +/// Build a value-location component for a stack offset. +fn stack_component(offset: i64) -> ValueLocationComponent { + ValueLocationComponent { + variable: Variable::from_stack_offset(offset), + offset: 0, + size: None, } } From ee7c94f03c1246676e6e83119369324a90724cde Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 02:54:29 -0500 Subject: [PATCH 12/25] [idb_import] Make folder mapping robust and observable Function folders in Binary Ninja's symbol list are backed by the component API (the docs describe creating them "automatically via the API", linking to binaryninja.component.Component), so the component approach is correct. Improve the mapping so it does not depend on analysis having indexed the functions yet: capture the Ref returned when each function is created and key it by rebased address, then place those into folders directly (falling back to a view lookup only when needed). Add a summary log line reporting how many folders were created, how many functions were placed, and how many could not be found, and align terminology to "folder" to match the UI while the underlying type stays a component. --- plugins/idb_import/src/mapper.rs | 109 ++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 25 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index d197e3f5df..8e04eb0ee6 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -8,6 +8,7 @@ use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; use binaryninja::component::{Component, ComponentBuilder}; +use binaryninja::function::Function; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; @@ -17,7 +18,7 @@ use binaryninja::variable::Variable; use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; use sha2::{Digest, Sha256}; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; /// Maps IDB data into a [`BinaryView`]. /// @@ -119,16 +120,25 @@ impl IDBMapper { self.map_types_to_view(view, &til_translator, &self.info.merged_types()); + // Keep the created functions keyed by their (rebased) address so we can place them into + // folders later without depending on the analysis having indexed them yet. + let mut functions_by_address: HashMap> = HashMap::new(); for func in &self.info.merged_functions() { let mut rebased_func = func.clone(); rebased_func.address = rebase(func.address); - self.map_func_to_view(view, &til_translator, &rebased_func); + if let Some(bn_func) = self.map_func_to_view(view, &til_translator, &rebased_func) { + functions_by_address.insert(rebased_func.address, bn_func); + } } for export in &id0.exports { let mut rebased_export = export.clone(); rebased_export.address = rebase(export.address); - self.map_export_to_view(view, &til_translator, &rebased_export); + if let Some(bn_func) = + self.map_export_to_view(view, &til_translator, &rebased_export) + { + functions_by_address.insert(rebased_export.address, bn_func); + } } // NOTE: The undo bracketing below is not thread safe, so the mapper must be the only @@ -157,14 +167,24 @@ impl IDBMapper { self.map_label_to_view(view, &rebased_label); } - // Recreate IDA's "Functions" window folder hierarchy as Binary Ninja components. + // Recreate IDA's "Functions" window folder hierarchy. Binary Ninja's component API backs + // what the UI shows as function folders. if let Some(dir_tree) = &self.info.dir_tree { if !dir_tree.function_folders.is_empty() { + let mut stats = FolderStats::default(); self.map_function_folders_to_view( view, &dir_tree.function_folders, None, &rebase, + &functions_by_address, + &mut stats, + ); + tracing::info!( + "Mapped function folders: {} folders created, {} functions placed, {} not found", + stats.folders_created, + stats.functions_placed, + stats.functions_missing ); } } @@ -325,7 +345,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, export: &ExportInfo, - ) { + ) -> Option> { let within_code_section = view .sections_at(export.address) .iter() @@ -347,7 +367,7 @@ impl IDBMapper { register_vars: Vec::new(), stack_frame: None, }; - self.map_func_to_view(view, til_translator, &func_info); + return self.map_func_to_view(view, til_translator, &func_info); } else { tracing::debug!("Mapping data export: {:0x}", export.address); let name_info = NameInfo { @@ -358,6 +378,7 @@ impl IDBMapper { }; self.map_name_to_view(view, til_translator, &name_info); } + None } pub fn map_func_to_view( @@ -365,7 +386,7 @@ impl IDBMapper { view: &BinaryView, til_translator: &TILTranslator, func: &FunctionInfo, - ) { + ) -> Option> { // We need to skip things that hit the extern section, since they do not have a bearing in the // actual context of the binary, and can be derived differently between IDA and Binja. let within_extern_section = view @@ -375,12 +396,12 @@ impl IDBMapper { .is_some(); if within_extern_section { tracing::debug!("Skipping function in extern section: {:0x}", func.address); - return; + return None; } let Some(bn_func) = view.add_auto_function(func.address) else { tracing::warn!("Failed to add function for {:0x}", func.address); - return; + return None; }; // IDA marks functions that never return (e.g. `abort`, `exit`); carry that over so BN's @@ -421,6 +442,8 @@ impl IDBMapper { self.map_register_vars_to_func(&bn_func, func); self.map_stack_frame_to_func(&bn_func, til_translator, func); + + Some(bn_func) } /// Apply a function's IDA stack frame as Binary Ninja stack variables. @@ -522,18 +545,21 @@ impl IDBMapper { } } - /// Recreate an IDA function folder tree as Binary Ninja components. + /// Recreate an IDA function folder tree in the view. /// - /// Each IDA folder becomes a component nested under its parent (the root view component for - /// top-level folders), and each function leaf is added to the component for its folder. - /// Functions sitting directly at the dirtree root are left uncomponented, matching their - /// "no folder" status in IDA. + /// Binary Ninja models function folders with the component API (`Component`), which the UI + /// presents as folders. Each IDA folder becomes a component nested under its parent (the root + /// view component for top-level folders), and each function leaf is added to the folder it + /// belongs to. Functions sitting directly at the dirtree root are left in no folder, matching + /// their state in IDA. fn map_function_folders_to_view( &self, view: &BinaryView, entries: &[FunctionFolderEntry], parent: Option<&Component>, rebase: &impl Fn(u64) -> u64, + functions_by_address: &HashMap>, + stats: &mut FolderStats, ) { for entry in entries { match entry { @@ -542,15 +568,32 @@ impl IDBMapper { continue; }; let rebased = rebase(*address); - let functions_at = view.functions_at(rebased); - let func = functions_at.iter().find(|func| func.start() == rebased); - if let Some(func) = func { - parent.add_function(&func); - } else { - tracing::debug!( - "No function at {:0x} to place in folder, skipping", - rebased - ); + // Prefer the function we created during mapping; fall back to a view lookup in + // case it came from another source. + let func = functions_by_address.get(&rebased).cloned().or_else(|| { + view.functions_at(rebased) + .iter() + .find(|func| func.start() == rebased) + .map(|func| func.to_owned()) + }); + match func { + Some(func) if parent.add_function(&func) => { + stats.functions_placed += 1; + } + Some(_) => { + tracing::debug!( + "Component rejected function at {:0x}", + rebased + ); + stats.functions_missing += 1; + } + None => { + tracing::debug!( + "No function at {:0x} to place in folder, skipping", + rebased + ); + stats.functions_missing += 1; + } } } FunctionFolderEntry::Folder { name, entries } => { @@ -559,8 +602,16 @@ impl IDBMapper { builder = builder.parent(parent.guid()); } let component = builder.finalize(); - tracing::debug!("Created component for folder '{}'", name); - self.map_function_folders_to_view(view, entries, Some(&component), rebase); + stats.folders_created += 1; + tracing::debug!("Created folder '{}'", name); + self.map_function_folders_to_view( + view, + entries, + Some(&component), + rebase, + functions_by_address, + stats, + ); } } } @@ -652,6 +703,14 @@ impl IDBMapper { } } +/// Running totals for the folder mapping, used for a single summary log line. +#[derive(Default)] +struct FolderStats { + folders_created: usize, + functions_placed: usize, + functions_missing: usize, +} + /// Compute the SHA256 of the raw (on-disk) bytes backing `view`. /// /// Walks to the root of the parent-view chain, which is the raw view whose contents are the From d61c0d896dfbd9623a30b40f5aba6ad1805fb7dc Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:16:01 -0500 Subject: [PATCH 13/25] [idb_import] Resolve function return-value locations Reuse the register/stack location resolver to honor a function's explicit return location (function retloc) when the database records one, attaching it to the BN return value at full confidence. Functions without an explicit return location, or whose location cannot be resolved, keep the calling-convention-derived return as before. --- plugins/idb_import/src/translate.rs | 42 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index d41380be6e..37d795bf4e 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -3,13 +3,13 @@ use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; use binaryninja::calling_convention::CoreCallingConvention; -use binaryninja::confidence::Conf; +use binaryninja::confidence::{Conf, MAX_CONFIDENCE}; use binaryninja::platform::Platform; use binaryninja::rc::Ref; use binaryninja::types::{ EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, - NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, - TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, + NamedTypeReferenceClass, ReturnValue, StructureBuilder, StructureMember, StructureType, Type, + TypeBuilder, TypeContainer, ValueLocation, ValueLocationComponent, ValueLocationSource, }; use binaryninja::variable::Variable; use idb_rs::til::function::{ArgLoc, CallingConvention}; @@ -328,13 +328,10 @@ impl TILTranslator { &self, function_ty: &idb_rs::til::function::Function, ) -> anyhow::Result { - // NOTE: `function_ty.retloc` (and the per-argument `arg.loc`) carry explicit storage - // locations as IDA `ArgLoc`s. Honoring them requires translating IDA's register-relative - // encodings (`Reg1`/`Reg2`/`RRel` hold raw IDA register indices) into BN register ids, - // which is processor-specific and has no general mapping available here. Until that - // per-architecture register map exists we let the calling convention derive locations, - // which is correct for the standard conventions we already map. let return_ty = self.translate_type_info(&function_ty.ret)?; + // Recover the explicit return-value location (e.g. a non-default return register) when the + // database records one; otherwise the calling convention derives it. + let return_value = self.build_return_value(&return_ty, function_ty.retloc.as_ref()); let params: Vec = self.build_function_params(&function_ty.args)?; // An ellipsis calling convention is IDA's marker for a variadic function. let has_variable_args = matches!( @@ -349,7 +346,7 @@ impl TILTranslator { { let cc = self.cdecl_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -359,7 +356,7 @@ impl TILTranslator { Some(CallingConvention::Stdcall) if self.stdcall_calling_convention.is_some() => { let cc = self.stdcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -369,7 +366,7 @@ impl TILTranslator { Some(CallingConvention::Fastcall) if self.fastcall_calling_convention.is_some() => { let cc = self.fastcall_calling_convention.clone().unwrap(); TypeBuilder::function_with_opts( - &return_ty, + return_value, ¶ms, has_variable_args, cc, @@ -382,6 +379,27 @@ impl TILTranslator { Ok(builder) } + /// Build a [`ReturnValue`] for a function, attaching an explicit storage location when the + /// database records a return location that we can resolve to registers/stack. + fn build_return_value(&self, return_ty: &Ref, retloc: Option<&ArgLoc>) -> ReturnValue { + let location = self + .value_location_components(retloc) + .map(|(components, indirect)| { + Conf::new( + ValueLocation { + components, + indirect, + returned_pointer: None, + }, + MAX_CONFIDENCE, + ) + }); + ReturnValue { + ty: Conf::new(return_ty.clone(), MAX_CONFIDENCE), + location, + } + } + pub fn build_function_params( &self, args: &[idb_rs::til::function::FunctionArg], From a0ad3f0b3229c8bc81981f8a610b1fac6e6d542b Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:16:56 -0500 Subject: [PATCH 14/25] [idb_import] Dedup segments by exact range as well as name A segment that covers the exact same address range as an existing section is the same region under a possibly different name, so skip it rather than add a duplicate. We still avoid an overlap-based check, which would wrongly suppress every segment because the loader maps the whole address space. --- plugins/idb_import/src/mapper.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 8e04eb0ee6..2c7a9391d9 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -314,10 +314,11 @@ impl IDBMapper { SegmentType::Imem => Semantics::DefaultSection, }; - // NOTE: We dedup on section name rather than address range on purpose. The BN loader has - // usually already mapped the whole address space, so an IDA segment would always overlap - // an existing section and a range-based check would suppress every segment. Name-based - // dedup only skips re-adding a section we (or the loader) already named identically. + // Skip a segment we have already mapped. We check both the name and the exact address + // range: an overlap-based check would be wrong because the loader normally maps the whole + // address space (so every IDA segment overlaps something), but an IDA segment that covers + // the exact same range as an existing section — even under a different name — is the same + // region and should not be added twice. if view.section_by_name(&segment.name).is_some() { tracing::debug!( "Section with name '{}' already exists, skipping...", @@ -325,6 +326,19 @@ impl IDBMapper { ); return; } + if view + .sections() + .iter() + .any(|existing| existing.address_range() == segment.region) + { + tracing::debug!( + "Section covering {:0x}-{:0x} already exists, skipping '{}'...", + segment.region.start, + segment.region.end, + segment.name + ); + return; + } tracing::info!( "Mapping segment '{}': {:0x} - {:0x} ({:?})", From 06fb9b02d9e91eb452918d950952da083dfe72cc Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:18:47 -0500 Subject: [PATCH 15/25] [idb_import] Generalize base-address comment to all formats The lowest-segment rebasing fix is format agnostic; reword its comment so it no longer reads as Mach-O specific. The first section starting after the format headers (Mach-O load commands, PE headers, ELF program headers) is a general property, and aligning to the lowest segment matches IDA's image base regardless of format. --- plugins/idb_import/src/mapper.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 2c7a9391d9..51344a2f30 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -68,10 +68,13 @@ impl IDBMapper { BaseAddressInfo::BaseSegment(start_addr) => bn_base_address.wrapping_sub(start_addr), BaseAddressInfo::BaseSection(section_addr) => { // Align against the lowest mapped *segment*, not the lowest section. - // IDA's `min_ea` is the image base (it maps the file header too), but a - // Mach-O's first section (`__text`) starts after the header + load - // commands. Using the lowest section here over-shifts every imported - // address by the header size; segments include the header region. + // IDA's `min_ea` is the image base, and it maps the file header region too. + // Across executable formats the first *section* generally begins after the + // format's headers/metadata (e.g. the Mach-O header + load commands, the PE + // headers, the ELF header + program headers), so aligning to the lowest section + // over-shifts every imported address by the header size. Segments include the + // header region, so the lowest segment matches IDA's image base regardless of + // format. let bn_segment_addr = view .segments() .iter() From dd3e8e0c978400314f976c2d58198707717173f0 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:23:23 -0500 Subject: [PATCH 16/25] [idb_import] Translate unknown-unsized basic type as void idb-rs now parses a bare unknown type (unspecified size) instead of erroring, so handle it here: a zero-width unknown has no integer representation, so map it to void rather than constructing a zero-width int. --- plugins/idb_import/src/translate.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs index 37d795bf4e..88f1471a92 100644 --- a/plugins/idb_import/src/translate.rs +++ b/plugins/idb_import/src/translate.rs @@ -260,6 +260,8 @@ impl TILTranslator { // one is available (see `with_til_info`), falling back to the standard C ABI defaults. match basic_ty { Basic::Void => Ok(TypeBuilder::void()), + // An unknown type of unspecified size has no integer representation; treat it as void. + Basic::Unknown { bytes: 0 } => Ok(TypeBuilder::void()), Basic::Unknown { bytes } => { // In the samples provided it appears that unknown can be used to represent a byte, // so we are going to be liberal and allow unknown basic types to be treated as a sized int. From 610f28c546af9b5231f8ff81541dcee4230ccc40 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:40:15 -0500 Subject: [PATCH 17/25] [idb_import] Import typed data variables from byte flags IDA records a type for the data items it defines (byte/word/dword/qword/ oword integers, float/double/tbyte reals, and string literals), but the importer previously only created data variables for data that was named or carried an explicit TIL type. Walk the byte flags (id1), recover each defined data item's kind and size, and define a Binary Ninja data variable of the corresponding type. These are applied before the name/TIL pass so a more precise named type still wins on the same address. Structs, alignment fill and vector/custom kinds are left to the type-driven path. --- plugins/idb_import/src/mapper.rs | 42 ++++++++++++++++++++-- plugins/idb_import/src/parse.rs | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 51344a2f30..7dfe75a685 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -1,8 +1,8 @@ //! Map the IDB data we parsed into the [`BinaryView`]. use crate::parse::{ - BaseAddressInfo, CommentInfo, ExportInfo, FunctionFolderEntry, FunctionInfo, IDBInfo, - LabelInfo, NameInfo, SegmentInfo, + BaseAddressInfo, CommentInfo, DataInfo, DataKind, ExportInfo, FunctionFolderEntry, + FunctionInfo, IDBInfo, LabelInfo, NameInfo, SegmentInfo, }; use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; @@ -156,6 +156,17 @@ impl IDBMapper { } view.file().forget_undo_actions(&undo); + // Apply typed data items before names so that a more precise type from the name/TIL path + // takes precedence over the byte-flag-derived type on the same address. + if !self.info.data_items.is_empty() { + tracing::info!("Mapping {} typed data items", self.info.data_items.len()); + } + for data in &self.info.data_items { + let mut rebased_data = data.clone(); + rebased_data.address = rebase(data.address); + self.map_data_item_to_view(view, &rebased_data); + } + for name in &self.info.merged_names() { let mut rebased_name = name.clone(); rebased_name.address = rebase(name.address); @@ -562,6 +573,33 @@ impl IDBMapper { } } + /// Define a typed data variable for a data item IDA recorded in its byte flags. + pub fn map_data_item_to_view(&self, view: &BinaryView, data: &DataInfo) { + // Skip the synthetic extern section, and anything that lives inside code/functions. + let within_extern_section = view + .sections_at(data.address) + .iter() + .any(|s| s.semantics() == Semantics::External); + if within_extern_section { + return; + } + let within_code_section = view + .sections_at(data.address) + .iter() + .any(|s| s.semantics() == Semantics::ReadOnlyCode); + if within_code_section || !view.functions_containing(data.address).is_empty() { + return; + } + + let data_ty = match data.kind { + DataKind::Int(bytes) => Type::int(bytes as usize, false), + DataKind::Float(bytes) => Type::float(bytes as usize), + DataKind::String(len) => Type::array(&Type::char(), len), + }; + tracing::debug!("Mapping data item: {:0x} ({:?})", data.address, data.kind); + view.define_auto_data_var(data.address, &data_ty); + } + /// Recreate an IDA function folder tree in the view. /// /// Binary Ninja models function folders with the component API (`Component`), which the UI diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index fb527d1503..044b27493e 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -73,6 +73,26 @@ pub struct NameInfo { pub exported: bool, } +/// A typed data item IDA has defined at an address (e.g. a dword, a float, a string), recovered +/// from the byte flags so that even unnamed/untyped-in-the-TIL data still gets a Binary Ninja +/// data variable of the right kind. +#[derive(Debug, Clone, Serialize)] +pub struct DataInfo { + pub address: u64, + pub kind: DataKind, +} + +/// The kind of a [`DataInfo`], with the size IDA recorded for the item. +#[derive(Debug, Clone, Copy, Serialize)] +pub enum DataKind { + /// Integer of the given size in bytes. + Int(u8), + /// Floating point value of the given size in bytes. + Float(u8), + /// String literal of the given total length in bytes. + String(u64), +} + #[derive(Debug, Clone, Serialize)] pub struct CommentInfo { pub address: u64, @@ -149,6 +169,8 @@ pub struct IDBInfo { // NOTE: TILSection is self-contained, so we do no pre-processing. pub til: Option, pub dir_tree: Option, + /// Typed data items recovered from the byte flags (id1). + pub data_items: Vec, } impl IDBInfo { @@ -341,14 +363,54 @@ impl IDBFileParser { let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; + // Recover typed data items from the byte flags so unnamed data still gets a data variable. + let data_items = id1 + .as_ref() + .map(|id1| self.parse_data_items(id1)) + .unwrap_or_default(); + Ok(IDBInfo { sha256, id0: id0_info, til, dir_tree: dir_tree_info, + data_items, }) } + /// Walk the byte flags and recover every defined data item along with the kind/size IDA gave + /// it. Items whose data type has no straightforward Binary Ninja scalar/string equivalent + /// (structs, alignment fill, vector/custom types) are left for the type-driven name path. + pub fn parse_data_items(&self, id1: &ID1Section) -> Vec { + use idb_rs::id1::{ByteDataType, ByteType}; + + let mut data_items = Vec::new(); + for (address, byte_info, size) in id1.all_bytes_no_tails() { + let ByteType::Data(data) = byte_info.byte_type() else { + continue; + }; + let kind = match data.data_type() { + ByteDataType::Byte => DataKind::Int(1), + ByteDataType::Word => DataKind::Int(2), + ByteDataType::Dword => DataKind::Int(4), + ByteDataType::Qword => DataKind::Int(8), + ByteDataType::Oword => DataKind::Int(16), + ByteDataType::Float => DataKind::Float(4), + ByteDataType::Double => DataKind::Float(8), + ByteDataType::Tbyte => DataKind::Float(10), + ByteDataType::Strlit => DataKind::String(size as u64), + // Structs carry their type through the TIL/name path; alignment, vector and + // custom kinds have no simple scalar mapping, so skip them here. + _ => continue, + }; + data_items.push(DataInfo { + address: address.into_raw().into_u64(), + kind, + }); + } + data_items + } + pub fn parse_id0(&self, id0: &ID0Section) -> anyhow::Result { let root_info_idx = id0.root_node()?; let root_info = id0.ida_info(root_info_idx)?; From a6087451ccd5df011a789f364947840a582d0b21 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:49:17 -0500 Subject: [PATCH 18/25] [idb_import] Import per-operand number formats (opt-in) IDA records how each instruction operand's number is displayed (hexadecimal, decimal, character, octal, binary, offset). Recover those from the byte flags and, behind a new "Apply IDB Operand Formats" setting (default off, since it disassembles each formatted instruction), apply them to the disassembly via Function::set_int_display_type. Each formatted instruction is disassembled to recover its immediate values, and every value is set under each formatted operand index. set_int_display_type only takes effect for the exact (value, operand) Binary Ninja renders, so combinations that do not occur are simply ignored, which keeps the mapping from a possible operand-index mismatch harmless. --- plugins/idb_import/src/lib.rs | 4 +- plugins/idb_import/src/mapper.rs | 97 +++++++++++++++++++++++++++++- plugins/idb_import/src/parse.rs | 76 +++++++++++++++++++++++ plugins/idb_import/src/settings.rs | 21 +++++++ 4 files changed, 195 insertions(+), 3 deletions(-) diff --git a/plugins/idb_import/src/lib.rs b/plugins/idb_import/src/lib.rs index baf9b8ee66..2f0b25a1b0 100644 --- a/plugins/idb_import/src/lib.rs +++ b/plugins/idb_import/src/lib.rs @@ -39,7 +39,9 @@ fn plugin_init() -> Result<(), ()> { let file_parser = IDBFileParser::new(); match file_parser.parse(&mut file_reader) { Ok(idb_info) => { - IDBMapper::new(idb_info).map_to_view(&view); + IDBMapper::new(idb_info) + .with_operand_formats(load_settings.apply_operand_formats) + .map_to_view(&view); } Err(e) => { tracing::error!("Failed to parse IDB file: {}", e); diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 7dfe75a685..4d6b2658e8 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -2,13 +2,15 @@ use crate::parse::{ BaseAddressInfo, CommentInfo, DataInfo, DataKind, ExportInfo, FunctionFolderEntry, - FunctionInfo, IDBInfo, LabelInfo, NameInfo, SegmentInfo, + FunctionInfo, IDBInfo, LabelInfo, NameInfo, OperandFormat, OperandFormatInfo, SegmentInfo, }; use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; use binaryninja::binary_view::{BinaryView, BinaryViewBase}; use binaryninja::component::{Component, ComponentBuilder}; +use binaryninja::disassembly::InstructionTextTokenKind; use binaryninja::function::Function; +use binaryninja::types::IntegerDisplayType; use binaryninja::qualified_name::QualifiedName; use binaryninja::rc::Ref; use binaryninja::section::{SectionBuilder, Semantics}; @@ -25,11 +27,21 @@ use std::collections::{HashMap, HashSet}; /// The mapper can be re-used if mapping into multiple views. pub struct IDBMapper { info: IDBInfo, + apply_operand_formats: bool, } impl IDBMapper { pub fn new(info: IDBInfo) -> Self { - Self { info } + Self { + info, + apply_operand_formats: false, + } + } + + /// Enable applying the IDB's per-operand number formats to the disassembly. + pub fn with_operand_formats(mut self, enabled: bool) -> Self { + self.apply_operand_formats = enabled; + self } pub fn map_to_view(&self, view: &BinaryView) { @@ -203,9 +215,68 @@ impl IDBMapper { } } + // Apply per-operand number formats to the disassembly, if the user opted in. This is + // gated because it disassembles each formatted instruction. + if self.apply_operand_formats && !self.info.operand_formats.is_empty() { + tracing::info!( + "Applying {} operand formats", + self.info.operand_formats.len() + ); + for operand_format in &self.info.operand_formats { + let mut rebased = operand_format.clone(); + rebased.address = rebase(operand_format.address); + self.map_operand_format_to_view(view, &rebased); + } + } + // self.map_used_types_to_view(view, &til_translator); } + /// Apply IDA's per-operand number formats to the instruction at an address. + /// + /// `set_int_display_type` only takes effect when Binary Ninja renders a token with the exact + /// (value, operand) we set, so disassembling the instruction to recover its immediate values + /// and setting each value under every formatted operand index is safe: combinations that do + /// not occur are simply never matched. + fn map_operand_format_to_view(&self, view: &BinaryView, operand_format: &OperandFormatInfo) { + let functions = view.functions_containing(operand_format.address); + let Some(func) = functions.iter().next() else { + return; + }; + let arch = func.arch(); + + // The longest instruction we may encounter; reading a few extra bytes is harmless. + let bytes = view.read_vec(operand_format.address, 16); + if bytes.is_empty() { + return; + } + let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_format.address) else { + return; + }; + + let values: Vec = tokens + .iter() + .filter_map(|token| integer_token_value(&token.kind)) + .collect(); + if values.is_empty() { + return; + } + + for (operand_index, format) in &operand_format.formats { + let display_type = integer_display_type(*format); + for value in &values { + func.set_int_display_type( + operand_format.address, + *value, + *operand_index as usize, + display_type, + Some(arch), + None, + ); + } + } + } + pub fn map_types_to_view( &self, view: &BinaryView, @@ -766,6 +837,28 @@ struct FolderStats { functions_missing: usize, } +/// Extract the integer value carried by a disassembly token, if any. +fn integer_token_value(kind: &InstructionTextTokenKind) -> Option { + match kind { + InstructionTextTokenKind::Integer { value, .. } => Some(*value), + InstructionTextTokenKind::PossibleAddress { value, .. } => Some(*value), + InstructionTextTokenKind::CodeRelativeAddress { value, .. } => Some(*value), + _ => None, + } +} + +/// Map an IDA operand number format to the Binary Ninja integer display type. +fn integer_display_type(format: OperandFormat) -> IntegerDisplayType { + match format { + OperandFormat::Hex => IntegerDisplayType::UnsignedHexadecimalDisplayType, + OperandFormat::Dec => IntegerDisplayType::SignedDecimalDisplayType, + OperandFormat::Char => IntegerDisplayType::CharacterConstantDisplayType, + OperandFormat::Oct => IntegerDisplayType::UnsignedOctalDisplayType, + OperandFormat::Bin => IntegerDisplayType::BinaryDisplayType, + OperandFormat::Offset => IntegerDisplayType::PointerDisplayType, + } +} + /// Compute the SHA256 of the raw (on-disk) bytes backing `view`. /// /// Walks to the root of the parent-view chain, which is the raw view whose contents are the diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 044b27493e..87f9163e1c 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -93,6 +93,25 @@ pub enum DataKind { String(u64), } +/// The per-operand number formats IDA recorded for an instruction. +#[derive(Debug, Clone, Serialize)] +pub struct OperandFormatInfo { + pub address: u64, + /// `(operand index, format)` pairs for the operands that have a non-default format. + pub formats: Vec<(u8, OperandFormat)>, +} + +/// How IDA displays an instruction operand's number. +#[derive(Debug, Clone, Copy, Serialize)] +pub enum OperandFormat { + Hex, + Dec, + Char, + Oct, + Bin, + Offset, +} + #[derive(Debug, Clone, Serialize)] pub struct CommentInfo { pub address: u64, @@ -171,6 +190,8 @@ pub struct IDBInfo { pub dir_tree: Option, /// Typed data items recovered from the byte flags (id1). pub data_items: Vec, + /// Per-operand number formats recovered from the byte flags (id1). + pub operand_formats: Vec, } impl IDBInfo { @@ -369,15 +390,53 @@ impl IDBFileParser { .map(|id1| self.parse_data_items(id1)) .unwrap_or_default(); + // Recover per-operand number formats (applied only when the user opts in). + let operand_formats = id1 + .as_ref() + .map(|id1| self.parse_operand_formats(id1)) + .unwrap_or_default(); + Ok(IDBInfo { sha256, id0: id0_info, til, dir_tree: dir_tree_info, data_items, + operand_formats, }) } + /// Walk the byte flags and recover the per-operand number formats IDA assigned to code. + pub fn parse_operand_formats( + &self, + id1: &ID1Section, + ) -> Vec { + let mut operand_formats = Vec::new(); + for (address, byte_info, _size) in id1.all_bytes_no_tails() { + let idb_rs::id1::ByteType::Code(code) = byte_info.byte_type() else { + continue; + }; + let mut formats = Vec::new(); + if let Ok(Some(op)) = code.operand0() { + if let Some(format) = operand_format_from_byte_op(op) { + formats.push((0, format)); + } + } + if let Ok(Some(op)) = code.operand1() { + if let Some(format) = operand_format_from_byte_op(op) { + formats.push((1, format)); + } + } + if !formats.is_empty() { + operand_formats.push(OperandFormatInfo { + address: address.into_raw().into_u64(), + formats, + }); + } + } + operand_formats + } + /// Walk the byte flags and recover every defined data item along with the kind/size IDA gave /// it. Items whose data type has no straightforward Binary Ninja scalar/string equivalent /// (structs, alignment fill, vector/custom types) are left for the type-driven name path. @@ -732,6 +791,23 @@ impl IDBFileParser { } } +/// Map an IDA operand representation to the subset of number formats we can apply directly. +/// +/// Enum, segment, stack-variable, struct-offset, forced and custom representations need extra +/// context (a resolved enum/struct/variable) and are left to other paths. +fn operand_format_from_byte_op(op: idb_rs::id1::ByteOp) -> Option { + use idb_rs::id1::ByteOp; + match op { + ByteOp::Hex => Some(OperandFormat::Hex), + ByteOp::Dec => Some(OperandFormat::Dec), + ByteOp::Char => Some(OperandFormat::Char), + ByteOp::Oct => Some(OperandFormat::Oct), + ByteOp::Bin => Some(OperandFormat::Bin), + ByteOp::Offset => Some(OperandFormat::Offset), + _ => None, + } +} + /// Recursively convert an IDA function dirtree into our [`FunctionFolderEntry`] tree. fn build_function_folders( entries: &[DirTreeEntry], diff --git a/plugins/idb_import/src/settings.rs b/plugins/idb_import/src/settings.rs index 1dca8bf24a..c8ff1eacc4 100644 --- a/plugins/idb_import/src/settings.rs +++ b/plugins/idb_import/src/settings.rs @@ -6,11 +6,13 @@ use std::path::PathBuf; #[derive(Debug, Clone)] pub struct LoadSettings { pub auto_load_file: Option, + pub apply_operand_formats: bool, } impl LoadSettings { pub const AUTO_LOAD_FILE_DEFAULT: &'static str = ""; pub const AUTO_LOAD_FILE_SETTING: &'static str = "analysis.idb.autoLoadFile"; + pub const APPLY_OPERAND_FORMATS_SETTING: &'static str = "analysis.idb.applyOperandFormats"; pub fn register() { let bn_settings = Settings::new(); @@ -23,6 +25,20 @@ impl LoadSettings { "uiSelectionAction" : "file" }); bn_settings.register_setting_json(Self::AUTO_LOAD_FILE_SETTING, &file_props.to_string()); + + let operand_formats_props = json!({ + "title" : "Apply IDB Operand Formats", + "type" : "boolean", + "default" : false, + "description" : "Apply the per-operand number formats (hexadecimal, decimal, \ + character, octal, binary, offset) recorded in the IDB to the disassembly. This \ + disassembles each formatted instruction during import and can be slow on large \ + databases." + }); + bn_settings.register_setting_json( + Self::APPLY_OPERAND_FORMATS_SETTING, + &operand_formats_props.to_string(), + ); } pub fn from_view_settings(view: &BinaryView) -> Self { @@ -37,6 +53,10 @@ impl LoadSettings { load_settings.auto_load_file = Some(path); } } + if settings.contains(Self::APPLY_OPERAND_FORMATS_SETTING) { + load_settings.apply_operand_formats = + settings.get_bool_with_opts(Self::APPLY_OPERAND_FORMATS_SETTING, &mut query_opts); + } load_settings } } @@ -45,6 +65,7 @@ impl Default for LoadSettings { fn default() -> Self { Self { auto_load_file: None, + apply_operand_formats: false, } } } From f0142e48162da95a9575dc79ad52d9e4fd3da2db Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 03:54:11 -0500 Subject: [PATCH 19/25] [idb_import] Type struct data items with their real type The data-item pass typed scalar and string data but skipped struct items, leaving typed global structures untyped. Resolve a struct item's actual type from the TIL/byte info and define the data variable with it, preferring that explicit type over the byte-flag-derived scalar kind. The per-item type lookup is limited to struct items so the common scalar/string path stays fast. --- plugins/idb_import/src/mapper.rs | 36 +++++++++++++---- plugins/idb_import/src/parse.rs | 69 ++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 4d6b2658e8..54568bd66f 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -176,7 +176,7 @@ impl IDBMapper { for data in &self.info.data_items { let mut rebased_data = data.clone(); rebased_data.address = rebase(data.address); - self.map_data_item_to_view(view, &rebased_data); + self.map_data_item_to_view(view, &til_translator, &rebased_data); } for name in &self.info.merged_names() { @@ -645,7 +645,12 @@ impl IDBMapper { } /// Define a typed data variable for a data item IDA recorded in its byte flags. - pub fn map_data_item_to_view(&self, view: &BinaryView, data: &DataInfo) { + pub fn map_data_item_to_view( + &self, + view: &BinaryView, + til_translator: &TILTranslator, + data: &DataInfo, + ) { // Skip the synthetic extern section, and anything that lives inside code/functions. let within_extern_section = view .sections_at(data.address) @@ -662,12 +667,29 @@ impl IDBMapper { return; } - let data_ty = match data.kind { - DataKind::Int(bytes) => Type::int(bytes as usize, false), - DataKind::Float(bytes) => Type::float(bytes as usize), - DataKind::String(len) => Type::array(&Type::char(), len), + // Prefer the explicit IDA type (e.g. a struct) over the byte-flag-derived scalar kind. + let data_ty = if let Some(ty) = &data.ty { + match til_translator.translate_type_info(ty) { + Ok(translated) => translated, + Err(err) => { + tracing::warn!( + "Failed to translate data type at {:0x}: {}", + data.address, + err + ); + return; + } + } + } else if let Some(kind) = data.kind { + match kind { + DataKind::Int(bytes) => Type::int(bytes as usize, false), + DataKind::Float(bytes) => Type::float(bytes as usize), + DataKind::String(len) => Type::array(&Type::char(), len), + } + } else { + return; }; - tracing::debug!("Mapping data item: {:0x} ({:?})", data.address, data.kind); + tracing::debug!("Mapping data item: {:0x}", data.address); view.define_auto_data_var(data.address, &data_ty); } diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 87f9163e1c..197878ce1e 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -73,13 +73,16 @@ pub struct NameInfo { pub exported: bool, } -/// A typed data item IDA has defined at an address (e.g. a dword, a float, a string), recovered -/// from the byte flags so that even unnamed/untyped-in-the-TIL data still gets a Binary Ninja -/// data variable of the right kind. +/// A typed data item IDA has defined at an address (e.g. a dword, a float, a string, or a +/// struct), recovered from the byte flags so that even unnamed/untyped-in-the-TIL data still gets +/// a Binary Ninja data variable of the right type. #[derive(Debug, Clone, Serialize)] pub struct DataInfo { pub address: u64, - pub kind: DataKind, + /// The explicit IDA type for this item, when one is recorded (e.g. a struct instance). + pub ty: Option, + /// The byte-flag-derived kind, used when there is no explicit type. + pub kind: Option, } /// The kind of a [`DataInfo`], with the size IDA recorded for the item. @@ -385,10 +388,10 @@ impl IDBFileParser { let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; // Recover typed data items from the byte flags so unnamed data still gets a data variable. - let data_items = id1 - .as_ref() - .map(|id1| self.parse_data_items(id1)) - .unwrap_or_default(); + let data_items = match (id0.as_ref(), id1.as_ref()) { + (Some(id0), Some(id1)) => self.parse_data_items(id0, id1, id2.as_ref())?, + _ => Vec::new(), + }; // Recover per-operand number formats (applied only when the user opts in). let operand_formats = id1 @@ -440,34 +443,56 @@ impl IDBFileParser { /// Walk the byte flags and recover every defined data item along with the kind/size IDA gave /// it. Items whose data type has no straightforward Binary Ninja scalar/string equivalent /// (structs, alignment fill, vector/custom types) are left for the type-driven name path. - pub fn parse_data_items(&self, id1: &ID1Section) -> Vec { + pub fn parse_data_items( + &self, + id0: &ID0Section, + id1: &ID1Section, + id2: Option<&ID2Section>, + ) -> anyhow::Result> { use idb_rs::id1::{ByteDataType, ByteType}; + let root_info = id0.ida_info(id0.root_node()?)?; + let netdelta = root_info.netdelta(); + let mut data_items = Vec::new(); for (address, byte_info, size) in id1.all_bytes_no_tails() { let ByteType::Data(data) = byte_info.byte_type() else { continue; }; let kind = match data.data_type() { - ByteDataType::Byte => DataKind::Int(1), - ByteDataType::Word => DataKind::Int(2), - ByteDataType::Dword => DataKind::Int(4), - ByteDataType::Qword => DataKind::Int(8), - ByteDataType::Oword => DataKind::Int(16), - ByteDataType::Float => DataKind::Float(4), - ByteDataType::Double => DataKind::Float(8), - ByteDataType::Tbyte => DataKind::Float(10), - ByteDataType::Strlit => DataKind::String(size as u64), - // Structs carry their type through the TIL/name path; alignment, vector and - // custom kinds have no simple scalar mapping, so skip them here. - _ => continue, + ByteDataType::Byte => Some(DataKind::Int(1)), + ByteDataType::Word => Some(DataKind::Int(2)), + ByteDataType::Dword => Some(DataKind::Int(4)), + ByteDataType::Qword => Some(DataKind::Int(8)), + ByteDataType::Oword => Some(DataKind::Int(16)), + ByteDataType::Float => Some(DataKind::Float(4)), + ByteDataType::Double => Some(DataKind::Float(8)), + ByteDataType::Tbyte => Some(DataKind::Float(10)), + ByteDataType::Strlit => Some(DataKind::String(size as u64)), + // Structs carry their actual type in the TIL; resolve it below. Alignment fill + // and vector/custom kinds have no simple mapping and are skipped. + _ => None, + }; + + // A struct item only makes sense with its real type; look it up. Avoid the per-item + // type lookup for the (vastly more common) scalar/string items. + let ty = if matches!(data.data_type(), ByteDataType::Struct) { + AddressInfo::new(id0, id1, id2, netdelta, address) + .and_then(|info| info.tinfo(&root_info).ok().flatten()) + } else { + None }; + + if ty.is_none() && kind.is_none() { + continue; + } data_items.push(DataInfo { address: address.into_raw().into_u64(), + ty, kind, }); } - data_items + Ok(data_items) } pub fn parse_id0(&self, id0: &ID0Section) -> anyhow::Result { From a61a757938ace966b0c8e3ba4b23a5e9c1f6261b Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 14:38:47 -0500 Subject: [PATCH 20/25] [idb_import] Type string data with their character width Use the string type idb-rs now exposes to size string data variables by their real character width: a 1-byte string stays a char array, while UTF-16/UTF-32 strings become wide-character arrays (with the element count being the character count) instead of being mistyped as a byte array. --- plugins/idb_import/src/mapper.rs | 12 +++++++++++- plugins/idb_import/src/parse.rs | 30 ++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 54568bd66f..da2cf7ad6b 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -684,7 +684,17 @@ impl IDBMapper { match kind { DataKind::Int(bytes) => Type::int(bytes as usize, false), DataKind::Float(bytes) => Type::float(bytes as usize), - DataKind::String(len) => Type::array(&Type::char(), len), + DataKind::String { len, char_width } => { + // Use a wide character for UTF-16/UTF-32 so the string renders correctly + // instead of as a byte array; the element count is the character count. + let element = if char_width <= 1 { + Type::char() + } else { + Type::wide_char(char_width as usize) + }; + let count = len / char_width.max(1) as u64; + Type::array(&element, count) + } } } else { return; diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 197878ce1e..44ddb3e6df 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -92,8 +92,9 @@ pub enum DataKind { Int(u8), /// Floating point value of the given size in bytes. Float(u8), - /// String literal of the given total length in bytes. - String(u64), + /// String literal of the given total length in bytes, with the character width in bytes + /// (1 for C/UTF-8, 2 for UTF-16, 4 for UTF-32). + String { len: u64, char_width: u8 }, } /// The per-operand number formats IDA recorded for an instruction. @@ -468,14 +469,25 @@ impl IDBFileParser { ByteDataType::Float => Some(DataKind::Float(4)), ByteDataType::Double => Some(DataKind::Float(8)), ByteDataType::Tbyte => Some(DataKind::Float(10)), - ByteDataType::Strlit => Some(DataKind::String(size as u64)), + ByteDataType::Strlit => { + // Recover the character width so wide (UTF-16/32) strings are not mistyped as + // single-byte. Defaults to one byte when no explicit string type is recorded. + let char_width = AddressInfo::new(id0, id1, id2, netdelta, address) + .and_then(|info| info.str_type()) + .map(|str_type| str_char_width(str_type.width)) + .unwrap_or(1); + Some(DataKind::String { + len: size as u64, + char_width, + }) + } // Structs carry their actual type in the TIL; resolve it below. Alignment fill // and vector/custom kinds have no simple mapping and are skipped. _ => None, }; // A struct item only makes sense with its real type; look it up. Avoid the per-item - // type lookup for the (vastly more common) scalar/string items. + // type lookup for the (vastly more common) scalar items. let ty = if matches!(data.data_type(), ByteDataType::Struct) { AddressInfo::new(id0, id1, id2, netdelta, address) .and_then(|info| info.tinfo(&root_info).ok().flatten()) @@ -816,6 +828,16 @@ impl IDBFileParser { } } +/// The byte width of an IDA string character width. +fn str_char_width(width: idb_rs::addr_info::StrWidth) -> u8 { + use idb_rs::addr_info::StrWidth; + match width { + StrWidth::Byte => 1, + StrWidth::Word => 2, + StrWidth::Dword => 4, + } +} + /// Map an IDA operand representation to the subset of number formats we can apply directly. /// /// Enum, segment, stack-variable, struct-offset, forced and custom representations need extra From 6edeedbd455658805ea7fa2dd52430ae433bfca4 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 16:15:47 -0500 Subject: [PATCH 21/25] [idb_import] Display enum operands against their enumeration Recover operands IDA displays as enumeration members, resolve each to its enumeration via idb-rs (op_enum_type, which maps the operand's member tid to the owning enumeration by tid range), and apply it to the disassembly with EnumerationDisplayType. Like the number-format pass it disassembles each operand to recover the immediate value and is gated behind the same "Apply IDB Operand Formats" setting. --- plugins/idb_import/src/mapper.rs | 59 +++++++++++++++++++++++++++++++- plugins/idb_import/src/parse.rs | 58 +++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index da2cf7ad6b..3ec664e9df 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -2,7 +2,8 @@ use crate::parse::{ BaseAddressInfo, CommentInfo, DataInfo, DataKind, ExportInfo, FunctionFolderEntry, - FunctionInfo, IDBInfo, LabelInfo, NameInfo, OperandFormat, OperandFormatInfo, SegmentInfo, + FunctionInfo, IDBInfo, LabelInfo, NameInfo, OperandEnumInfo, OperandFormat, OperandFormatInfo, + SegmentInfo, }; use crate::translate::TILTranslator; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterInfo}; @@ -229,9 +230,65 @@ impl IDBMapper { } } + // Apply enum-displayed operands (same gating, since it also disassembles each operand). + if self.apply_operand_formats && !self.info.operand_enums.is_empty() { + tracing::info!( + "Applying {} enum-displayed operands", + self.info.operand_enums.len() + ); + for operand_enum in &self.info.operand_enums { + let mut rebased = operand_enum.clone(); + rebased.address = rebase(operand_enum.address); + self.map_operand_enum_to_view(view, &rebased); + } + } + // self.map_used_types_to_view(view, &til_translator); } + /// Display an operand against its enumeration, as IDA does. + /// + /// Resolves the enumeration's Binary Ninja type id and sets it on the operand for every + /// immediate value in the instruction; like the number-format pass, an entry only takes + /// effect for the exact (value, operand) Binary Ninja renders. + fn map_operand_enum_to_view(&self, view: &BinaryView, operand_enum: &OperandEnumInfo) { + let Some(type_id) = view.type_id_by_name(operand_enum.enum_name.as_str()) else { + tracing::debug!( + "No Binary Ninja type for enum '{}', skipping operand at {:0x}", + operand_enum.enum_name, + operand_enum.address + ); + return; + }; + + let functions = view.functions_containing(operand_enum.address); + let Some(func) = functions.iter().next() else { + return; + }; + let arch = func.arch(); + + let bytes = view.read_vec(operand_enum.address, 16); + if bytes.is_empty() { + return; + } + let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_enum.address) else { + return; + }; + + for token in &tokens { + if let Some(value) = integer_token_value(&token.kind) { + func.set_int_display_type( + operand_enum.address, + value, + operand_enum.operand as usize, + IntegerDisplayType::EnumerationDisplayType, + Some(arch), + Some(type_id.as_str()), + ); + } + } + } + /// Apply IDA's per-operand number formats to the instruction at an address. /// /// `set_int_display_type` only takes effect when Binary Ninja renders a token with the exact diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 44ddb3e6df..17651df911 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -116,6 +116,15 @@ pub enum OperandFormat { Offset, } +/// An instruction operand IDA displays as a member of an enumeration. +#[derive(Debug, Clone, Serialize)] +pub struct OperandEnumInfo { + pub address: u64, + pub operand: u8, + /// The name of the enumeration the operand is displayed against. + pub enum_name: String, +} + #[derive(Debug, Clone, Serialize)] pub struct CommentInfo { pub address: u64, @@ -194,6 +203,8 @@ pub struct IDBInfo { pub dir_tree: Option, /// Typed data items recovered from the byte flags (id1). pub data_items: Vec, + /// Operands IDA displays as enumeration members. + pub operand_enums: Vec, /// Per-operand number formats recovered from the byte flags (id1). pub operand_formats: Vec, } @@ -400,16 +411,63 @@ impl IDBFileParser { .map(|id1| self.parse_operand_formats(id1)) .unwrap_or_default(); + // Recover operands displayed as enumeration members, resolved to their enumeration. + let operand_enums = match (id0.as_ref(), id1.as_ref(), til.as_ref()) { + (Some(id0), Some(id1), Some(til)) => { + self.parse_operand_enums(id0, id1, id2.as_ref(), til)? + } + _ => Vec::new(), + }; + Ok(IDBInfo { sha256, id0: id0_info, til, dir_tree: dir_tree_info, data_items, + operand_enums, operand_formats, }) } + /// Walk the byte flags and recover operands IDA displays as enumeration members, resolving + /// each to the enumeration it belongs to. + pub fn parse_operand_enums( + &self, + id0: &ID0Section, + id1: &ID1Section, + id2: Option<&ID2Section>, + til: &TILSection, + ) -> anyhow::Result> { + use idb_rs::id1::{ByteOp, ByteType}; + + let root_info = id0.ida_info(id0.root_node()?)?; + let netdelta = root_info.netdelta(); + + let mut operand_enums = Vec::new(); + for (address, byte_info, _size) in id1.all_bytes_no_tails() { + let ByteType::Code(code) = byte_info.byte_type() else { + continue; + }; + for (operand, op) in [(0u8, code.operand0()), (1u8, code.operand1())] { + if !matches!(op, Ok(Some(ByteOp::Enum))) { + continue; + } + let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, address) else { + continue; + }; + if let Some(enum_ty) = info.op_enum_type(operand, til) { + operand_enums.push(OperandEnumInfo { + address: address.into_raw().into_u64(), + operand, + enum_name: enum_ty.name.to_string(), + }); + } + } + } + Ok(operand_enums) + } + /// Walk the byte flags and recover the per-operand number formats IDA assigned to code. pub fn parse_operand_formats( &self, From d01e76831765afef3b3c5191361cc5653ec80ea1 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Fri, 5 Jun 2026 16:44:58 -0500 Subject: [PATCH 22/25] [idb_import] Probe enum altval directly instead of gating on operand flag The previous pass only considered an operand for enum display when its operand-representation flag read back as Enum. IDA records the referenced enumeration in a separate altval, independent of that nibble, so operands such as the immediate of `orr w8, w8, #imm` carry an enum reference the flag does not reflect and were skipped. Probe op_enum_type for operands 0 and 1 directly; it resolves to None when no enum is referenced, so the probe is self-gating and now recovers every enum-displayed operand. --- plugins/idb_import/src/parse.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs index 17651df911..9815feb8f2 100644 --- a/plugins/idb_import/src/parse.rs +++ b/plugins/idb_import/src/parse.rs @@ -439,23 +439,25 @@ impl IDBFileParser { id2: Option<&ID2Section>, til: &TILSection, ) -> anyhow::Result> { - use idb_rs::id1::{ByteOp, ByteType}; + use idb_rs::id1::ByteType; let root_info = id0.ida_info(id0.root_node()?)?; let netdelta = root_info.netdelta(); let mut operand_enums = Vec::new(); for (address, byte_info, _size) in id1.all_bytes_no_tails() { - let ByteType::Code(code) = byte_info.byte_type() else { + if !matches!(byte_info.byte_type(), ByteType::Code(_)) { + continue; + } + // Probe the enum altval directly for operands 0 and 1 rather than gating on the + // operand-representation flag: IDA records the referenced enum independently of that + // nibble, so instructions like `orr w8, w8, #imm` carry the enum altval even when the + // flag does not read back as `Enum`. `op_enum_type` returns `None` when there is no + // enum reference, so the probe is self-gating. + let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, address) else { continue; }; - for (operand, op) in [(0u8, code.operand0()), (1u8, code.operand1())] { - if !matches!(op, Ok(Some(ByteOp::Enum))) { - continue; - } - let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, address) else { - continue; - }; + for operand in 0u8..2 { if let Some(enum_ty) = info.op_enum_type(operand, til) { operand_enums.push(OperandEnumInfo { address: address.into_raw().into_u64(), From 53a8d796713ea1a404b27c2b416d43fd9a2ba50b Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Sat, 6 Jun 2026 02:54:45 -0500 Subject: [PATCH 23/25] Fix use-after-free of enum type id in set_int_display_type `set_int_display_type` converted the optional enumeration type id to an owned C string, then moved it into a closure to take its pointer. The C string was dropped at the end of that closure, before `BNSetIntegerConstantDisplayType` ran, so the FFI call read freed memory and stored a garbage type id for the enumeration display. As a result an integer operand set to EnumerationDisplayType never resolved to its enumeration and rendered as a raw constant. Borrow the owned C string instead so it outlives the call. --- rust/src/function.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/src/function.rs b/rust/src/function.rs index f9b6352419..71c7972da2 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -1945,7 +1945,11 @@ impl Function { ) { let arch = arch.unwrap_or_else(|| self.arch()); let enum_display_typeid = enum_display_typeid.map(IntoCStr::to_cstr); + // Borrow the owned C string rather than moving it into `map`, otherwise it is dropped + // before the FFI call below and `BNSetIntegerConstantDisplayType` reads freed memory, + // storing a garbage type id for the enumeration. let enum_display_typeid_ptr = enum_display_typeid + .as_ref() .map(|x| x.as_ptr()) .unwrap_or(std::ptr::null()); unsafe { From ced97c8e02ad28a542bfbf6386aec53ac49f830f Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Sat, 6 Jun 2026 02:54:58 -0500 Subject: [PATCH 24/25] [idb_import] Apply operand display overrides after functions exist Per-operand enum displays and number formats are applied with set_int_display_type, which needs the function containing the instruction. The IDB import runs as an early analysis activity, before functions are created, so functions_containing() returned empty and every override was silently dropped. Stash the rebased overrides in a per-view registry during the import and apply them from a BinaryViewInitialAnalysisCompletionEvent handler once functions exist, then request re-analysis so they render. Two further fixes make the overrides take effect: - Key the override by Binary Ninja's operand index, defined as the number of operand-separator tokens before the token in the rendered instruction, not IDA's operand number (e.g. the immediate of `orr w1, w8, #imm` is operand 2, while IDA records it as operand 1). Both the enum and number-format passes now count operand separators. - Apply enum displays before number formats and skip the format pass for addresses that carry an enum operand. IDA shows the enumeration even when the operand also has a number-format flag, so the format must not overwrite the enum override at the same operand. --- plugins/idb_import/src/lib.rs | 26 ++- plugins/idb_import/src/mapper.rs | 325 ++++++++++++++++++++----------- 2 files changed, 240 insertions(+), 111 deletions(-) diff --git a/plugins/idb_import/src/lib.rs b/plugins/idb_import/src/lib.rs index 2f0b25a1b0..eb18642985 100644 --- a/plugins/idb_import/src/lib.rs +++ b/plugins/idb_import/src/lib.rs @@ -1,11 +1,29 @@ use crate::mapper::IDBMapper; use crate::parse::IDBFileParser; use crate::settings::LoadSettings; -use binaryninja::binary_view::AnalysisContext; +use binaryninja::binary_view::{ + register_binary_view_event, AnalysisContext, BinaryView, BinaryViewEventHandler, + BinaryViewEventType, +}; use binaryninja::workflow::{activity, Activity, Workflow}; use std::fs::File; use std::io::BufReader; +/// Applies the per-operand display overrides (number formats and enum displays) that the import +/// deferred because they require the view's functions to exist. By the time initial analysis +/// completes the functions are present, so the overrides can be set and a re-analysis requested to +/// render them. +struct OperandDisplayApplier; + +impl BinaryViewEventHandler for OperandDisplayApplier { + fn on_event(&self, view: &BinaryView) { + if crate::mapper::apply_pending_operand_display(view) { + // The overrides only affect rendering once analysis re-runs over the functions. + view.update_analysis(); + } + } +} + mod commands; pub mod mapper; pub mod parse; @@ -24,6 +42,12 @@ fn plugin_init() -> Result<(), ()> { // Register settings globally. LoadSettings::register(); + // Apply deferred per-operand display overrides once functions exist. + register_binary_view_event( + BinaryViewEventType::BinaryViewInitialAnalysisCompletionEvent, + OperandDisplayApplier, + ); + let loader_activity = |ctx: &AnalysisContext| { let view = ctx.view(); let load_settings = LoadSettings::from_view_settings(&view); diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 3ec664e9df..1bd1b438d4 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -22,6 +22,185 @@ use idb_rs::id0::SegmentType; use idb_rs::til::TypeVariant; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; +use std::sync::{Mutex, OnceLock}; + +/// Per-operand display overrides (number formats and enum displays) that have been parsed and +/// rebased but cannot be applied yet because they require the view's functions to exist. +/// +/// `set_int_display_type` needs the [`Function`] containing an instruction, but the IDB import runs +/// as an early analysis activity, before functions are created. We stash the rebased overrides here +/// keyed by the view, then a [`crate`]-level analysis-completion handler applies them once functions +/// are available (see `lib.rs`). +pub struct PendingOperandDisplay { + pub operand_formats: Vec, + pub operand_enums: Vec, +} + +fn pending_operand_display() -> &'static Mutex> { + static REGISTRY: OnceLock>> = OnceLock::new(); + REGISTRY.get_or_init(|| Mutex::new(HashMap::new())) +} + +/// A stable per-view key shared between the import activity and the completion handler. The +/// completion event wraps the same underlying `BNBinaryView`, so its handle pointer matches. +fn view_key(view: &BinaryView) -> usize { + view.handle as usize +} + +/// Stash rebased operand-display overrides to be applied once the view's functions exist. +fn stash_operand_display(view: &BinaryView, pending: PendingOperandDisplay) { + if let Ok(mut registry) = pending_operand_display().lock() { + registry.insert(view_key(view), pending); + } +} + +/// Apply (and remove) any operand-display overrides stashed for this view. +/// +/// Enum displays (a handful) are applied first and rendered promptly; the much larger number-format +/// set is applied afterwards. Each phase requests its own re-analysis so the overrides become +/// visible. Returns whether any overrides were applied. +pub fn apply_pending_operand_display(view: &BinaryView) -> bool { + let key = view_key(view); + let Some(pending) = pending_operand_display() + .lock() + .ok() + .and_then(|mut registry| registry.remove(&key)) + else { + tracing::debug!("No pending operand display overrides for view {key:#x}"); + return false; + }; + + tracing::info!( + "Applying deferred operand display: {} enums, {} number formats", + pending.operand_enums.len(), + pending.operand_formats.len() + ); + + // Addresses whose operand is displayed as an enumeration. IDA shows the enum even when the + // operand also carries a number-format flag, so the enum must win: skip the format pass for + // these addresses, otherwise it would overwrite the enum override at the same operand. + let enum_addresses: HashSet = + pending.operand_enums.iter().map(|e| e.address).collect(); + + if !pending.operand_enums.is_empty() { + for operand_enum in &pending.operand_enums { + apply_operand_enum_display(view, operand_enum); + } + tracing::info!("Applied {} enum-displayed operands", pending.operand_enums.len()); + view.update_analysis(); + } + + if !pending.operand_formats.is_empty() { + let mut applied_formats = 0usize; + for operand_format in &pending.operand_formats { + if enum_addresses.contains(&operand_format.address) { + continue; + } + apply_operand_format_display(view, operand_format); + applied_formats += 1; + } + tracing::info!("Applied {applied_formats} operand number formats"); + view.update_analysis(); + } + + !pending.operand_enums.is_empty() || !pending.operand_formats.is_empty() +} + +/// Display an operand against its enumeration, as IDA does. +/// +/// Resolves the enumeration's Binary Ninja type id and sets it on each immediate token in the +/// instruction. The override is keyed by Binary Ninja's operand index, which `set_int_display_type` +/// defines as the number of operand-separator tokens preceding the token in the rendered line (not +/// IDA's operand number). Counting separators here makes the override match what Binary Ninja +/// renders, so the enumeration member is displayed once analysis re-renders the function. +fn apply_operand_enum_display(view: &BinaryView, operand_enum: &OperandEnumInfo) { + let Some(type_id) = view.type_id_by_name(operand_enum.enum_name.as_str()) else { + tracing::debug!( + "No Binary Ninja type for enum '{}', skipping operand at {:0x}", + operand_enum.enum_name, + operand_enum.address + ); + return; + }; + + let functions = view.functions_containing(operand_enum.address); + let Some(func) = functions.iter().next() else { + tracing::info!( + "apply_operand_enum_display: no function containing {:#x} for enum '{}'", + operand_enum.address, + operand_enum.enum_name + ); + return; + }; + let arch = func.arch(); + + let bytes = view.read_vec(operand_enum.address, 16); + if bytes.is_empty() { + return; + } + let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_enum.address) else { + return; + }; + + let mut separators = 0usize; + for token in &tokens { + if matches!(token.kind, InstructionTextTokenKind::OperandSeparator) { + separators += 1; + } + if let Some(value) = integer_token_value(&token.kind) { + func.set_int_display_type( + operand_enum.address, + value, + separators, + IntegerDisplayType::EnumerationDisplayType, + Some(arch), + Some(type_id.as_str()), + ); + } + } +} + +/// Apply IDA's per-operand number formats to the instruction at an address. +/// +/// Like the enum pass, the override is keyed by Binary Ninja's operand index (the count of +/// operand-separator tokens before the token), not IDA's operand number. Each recovered format is +/// applied to every immediate token at its rendered operand index; instructions carrying a format +/// almost always have a single immediate, so this matches what IDA formatted. +fn apply_operand_format_display(view: &BinaryView, operand_format: &OperandFormatInfo) { + let functions = view.functions_containing(operand_format.address); + let Some(func) = functions.iter().next() else { + return; + }; + let arch = func.arch(); + + // The longest instruction we may encounter; reading a few extra bytes is harmless. + let bytes = view.read_vec(operand_format.address, 16); + if bytes.is_empty() { + return; + } + let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_format.address) else { + return; + }; + + let mut separators = 0usize; + for token in &tokens { + if matches!(token.kind, InstructionTextTokenKind::OperandSeparator) { + separators += 1; + } + if let Some(value) = integer_token_value(&token.kind) { + for (_operand_index, format) in &operand_format.formats { + func.set_int_display_type( + operand_format.address, + value, + separators, + integer_display_type(*format), + Some(arch), + None, + ); + } + } + } +} /// Maps IDB data into a [`BinaryView`]. /// @@ -216,122 +395,48 @@ impl IDBMapper { } } - // Apply per-operand number formats to the disassembly, if the user opted in. This is - // gated because it disassembles each formatted instruction. - if self.apply_operand_formats && !self.info.operand_formats.is_empty() { - tracing::info!( - "Applying {} operand formats", - self.info.operand_formats.len() - ); - for operand_format in &self.info.operand_formats { - let mut rebased = operand_format.clone(); - rebased.address = rebase(operand_format.address); - self.map_operand_format_to_view(view, &rebased); - } - } - - // Apply enum-displayed operands (same gating, since it also disassembles each operand). - if self.apply_operand_formats && !self.info.operand_enums.is_empty() { + // Per-operand number formats and enum displays need the containing function to exist, but + // this import runs before functions are created. Rebase and stash them; the + // analysis-completion handler in `lib.rs` applies them once functions are available. Gated + // because applying them disassembles each affected instruction. + if self.apply_operand_formats + && (!self.info.operand_formats.is_empty() || !self.info.operand_enums.is_empty()) + { tracing::info!( - "Applying {} enum-displayed operands", + "Deferring {} operand formats and {} enum-displayed operands until analysis completes", + self.info.operand_formats.len(), self.info.operand_enums.len() ); - for operand_enum in &self.info.operand_enums { - let mut rebased = operand_enum.clone(); - rebased.address = rebase(operand_enum.address); - self.map_operand_enum_to_view(view, &rebased); - } - } - - // self.map_used_types_to_view(view, &til_translator); - } - - /// Display an operand against its enumeration, as IDA does. - /// - /// Resolves the enumeration's Binary Ninja type id and sets it on the operand for every - /// immediate value in the instruction; like the number-format pass, an entry only takes - /// effect for the exact (value, operand) Binary Ninja renders. - fn map_operand_enum_to_view(&self, view: &BinaryView, operand_enum: &OperandEnumInfo) { - let Some(type_id) = view.type_id_by_name(operand_enum.enum_name.as_str()) else { - tracing::debug!( - "No Binary Ninja type for enum '{}', skipping operand at {:0x}", - operand_enum.enum_name, - operand_enum.address + let operand_formats = self + .info + .operand_formats + .iter() + .map(|operand_format| { + let mut rebased = operand_format.clone(); + rebased.address = rebase(operand_format.address); + rebased + }) + .collect(); + let operand_enums = self + .info + .operand_enums + .iter() + .map(|operand_enum| { + let mut rebased = operand_enum.clone(); + rebased.address = rebase(operand_enum.address); + rebased + }) + .collect(); + stash_operand_display( + view, + PendingOperandDisplay { + operand_formats, + operand_enums, + }, ); - return; - }; - - let functions = view.functions_containing(operand_enum.address); - let Some(func) = functions.iter().next() else { - return; - }; - let arch = func.arch(); - - let bytes = view.read_vec(operand_enum.address, 16); - if bytes.is_empty() { - return; - } - let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_enum.address) else { - return; - }; - - for token in &tokens { - if let Some(value) = integer_token_value(&token.kind) { - func.set_int_display_type( - operand_enum.address, - value, - operand_enum.operand as usize, - IntegerDisplayType::EnumerationDisplayType, - Some(arch), - Some(type_id.as_str()), - ); - } } - } - /// Apply IDA's per-operand number formats to the instruction at an address. - /// - /// `set_int_display_type` only takes effect when Binary Ninja renders a token with the exact - /// (value, operand) we set, so disassembling the instruction to recover its immediate values - /// and setting each value under every formatted operand index is safe: combinations that do - /// not occur are simply never matched. - fn map_operand_format_to_view(&self, view: &BinaryView, operand_format: &OperandFormatInfo) { - let functions = view.functions_containing(operand_format.address); - let Some(func) = functions.iter().next() else { - return; - }; - let arch = func.arch(); - - // The longest instruction we may encounter; reading a few extra bytes is harmless. - let bytes = view.read_vec(operand_format.address, 16); - if bytes.is_empty() { - return; - } - let Some((_consumed, tokens)) = arch.instruction_text(&bytes, operand_format.address) else { - return; - }; - - let values: Vec = tokens - .iter() - .filter_map(|token| integer_token_value(&token.kind)) - .collect(); - if values.is_empty() { - return; - } - - for (operand_index, format) in &operand_format.formats { - let display_type = integer_display_type(*format); - for value in &values { - func.set_int_display_type( - operand_format.address, - *value, - *operand_index as usize, - display_type, - Some(arch), - None, - ); - } - } + // self.map_used_types_to_view(view, &til_translator); } pub fn map_types_to_view( From fc173556a5b70f369c42e80e2d9aa9bbbf0a05a7 Mon Sep 17 00:00:00 2001 From: Chris Kader Date: Sat, 6 Jun 2026 03:31:22 -0500 Subject: [PATCH 25/25] [idb_import] Split enum-operand and number-format settings Enum-displayed operands are a small, cheap set, while the per-operand number formats can number in the hundreds of thousands and dominate import time on large databases. Gate them independently: - analysis.idb.applyOperandEnums controls enum displays. - analysis.idb.applyOperandFormats controls number formats. Add analysis.idb.skipDefaultOperandFormats (default true): when applying number formats, skip operands whose format already matches Binary Ninja's default rendering (hexadecimal). These make up the bulk of formatted operands and applying them would not change the displayed text, so skipping them greatly reduces the disassembly work without affecting the result. --- plugins/idb_import/src/lib.rs | 2 + plugins/idb_import/src/mapper.rs | 91 ++++++++++++++++++++++-------- plugins/idb_import/src/settings.rs | 44 ++++++++++++++- 3 files changed, 113 insertions(+), 24 deletions(-) diff --git a/plugins/idb_import/src/lib.rs b/plugins/idb_import/src/lib.rs index eb18642985..1583d45e8a 100644 --- a/plugins/idb_import/src/lib.rs +++ b/plugins/idb_import/src/lib.rs @@ -64,7 +64,9 @@ fn plugin_init() -> Result<(), ()> { match file_parser.parse(&mut file_reader) { Ok(idb_info) => { IDBMapper::new(idb_info) + .with_operand_enums(load_settings.apply_operand_enums) .with_operand_formats(load_settings.apply_operand_formats) + .with_skip_default_operand_formats(load_settings.skip_default_operand_formats) .map_to_view(&view); } Err(e) => { diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs index 1bd1b438d4..1797ffde8c 100644 --- a/plugins/idb_import/src/mapper.rs +++ b/plugins/idb_import/src/mapper.rs @@ -207,23 +207,40 @@ fn apply_operand_format_display(view: &BinaryView, operand_format: &OperandForma /// The mapper can be re-used if mapping into multiple views. pub struct IDBMapper { info: IDBInfo, + apply_operand_enums: bool, apply_operand_formats: bool, + skip_default_operand_formats: bool, } impl IDBMapper { pub fn new(info: IDBInfo) -> Self { Self { info, + apply_operand_enums: false, apply_operand_formats: false, + skip_default_operand_formats: true, } } + /// Enable displaying operands against the enumeration IDA assigned them. + pub fn with_operand_enums(mut self, enabled: bool) -> Self { + self.apply_operand_enums = enabled; + self + } + /// Enable applying the IDB's per-operand number formats to the disassembly. pub fn with_operand_formats(mut self, enabled: bool) -> Self { self.apply_operand_formats = enabled; self } + /// Skip per-operand number formats that already match Binary Ninja's default rendering + /// (hexadecimal) when applying number formats. + pub fn with_skip_default_operand_formats(mut self, enabled: bool) -> Self { + self.skip_default_operand_formats = enabled; + self + } + pub fn map_to_view(&self, view: &BinaryView) { let Some(id0) = &self.info.id0 else { tracing::warn!("No ID0 data found, skipping mapping."); @@ -397,28 +414,10 @@ impl IDBMapper { // Per-operand number formats and enum displays need the containing function to exist, but // this import runs before functions are created. Rebase and stash them; the - // analysis-completion handler in `lib.rs` applies them once functions are available. Gated - // because applying them disassembles each affected instruction. - if self.apply_operand_formats - && (!self.info.operand_formats.is_empty() || !self.info.operand_enums.is_empty()) - { - tracing::info!( - "Deferring {} operand formats and {} enum-displayed operands until analysis completes", - self.info.operand_formats.len(), - self.info.operand_enums.len() - ); - let operand_formats = self - .info - .operand_formats - .iter() - .map(|operand_format| { - let mut rebased = operand_format.clone(); - rebased.address = rebase(operand_format.address); - rebased - }) - .collect(); - let operand_enums = self - .info + // analysis-completion handler in `lib.rs` applies them once functions are available. Each + // kind is gated by its own setting because applying them disassembles each instruction. + let operand_enums: Vec = if self.apply_operand_enums { + self.info .operand_enums .iter() .map(|operand_enum| { @@ -426,7 +425,46 @@ impl IDBMapper { rebased.address = rebase(operand_enum.address); rebased }) - .collect(); + .collect() + } else { + Vec::new() + }; + let operand_formats: Vec = if self.apply_operand_formats { + self.info + .operand_formats + .iter() + .filter_map(|operand_format| { + // When skipping defaults, drop formats that match Binary Ninja's default + // rendering (hexadecimal); they make up the bulk on large databases and + // changing them would not alter the displayed text. + let formats: Vec<_> = operand_format + .formats + .iter() + .filter(|(_, format)| { + !(self.skip_default_operand_formats + && operand_format_matches_default(*format)) + }) + .cloned() + .collect(); + if formats.is_empty() { + return None; + } + Some(OperandFormatInfo { + address: rebase(operand_format.address), + formats, + }) + }) + .collect() + } else { + Vec::new() + }; + + if !operand_enums.is_empty() || !operand_formats.is_empty() { + tracing::info!( + "Deferring {} enum-displayed operands and {} operand number formats until analysis completes", + operand_enums.len(), + operand_formats.len() + ); stash_operand_display( view, PendingOperandDisplay { @@ -1041,6 +1079,13 @@ fn integer_token_value(kind: &InstructionTextTokenKind) -> Option { } } +/// Whether a number format already matches Binary Ninja's default integer rendering +/// (hexadecimal). Applying such a format as an override would not change the displayed text, so it +/// can be skipped to avoid disassembling the bulk of formatted operands on large databases. +fn operand_format_matches_default(format: OperandFormat) -> bool { + matches!(format, OperandFormat::Hex) +} + /// Map an IDA operand number format to the Binary Ninja integer display type. fn integer_display_type(format: OperandFormat) -> IntegerDisplayType { match format { diff --git a/plugins/idb_import/src/settings.rs b/plugins/idb_import/src/settings.rs index c8ff1eacc4..e4d0e4bd6c 100644 --- a/plugins/idb_import/src/settings.rs +++ b/plugins/idb_import/src/settings.rs @@ -6,13 +6,18 @@ use std::path::PathBuf; #[derive(Debug, Clone)] pub struct LoadSettings { pub auto_load_file: Option, + pub apply_operand_enums: bool, pub apply_operand_formats: bool, + pub skip_default_operand_formats: bool, } impl LoadSettings { pub const AUTO_LOAD_FILE_DEFAULT: &'static str = ""; pub const AUTO_LOAD_FILE_SETTING: &'static str = "analysis.idb.autoLoadFile"; + pub const APPLY_OPERAND_ENUMS_SETTING: &'static str = "analysis.idb.applyOperandEnums"; pub const APPLY_OPERAND_FORMATS_SETTING: &'static str = "analysis.idb.applyOperandFormats"; + pub const SKIP_DEFAULT_OPERAND_FORMATS_SETTING: &'static str = + "analysis.idb.skipDefaultOperandFormats"; pub fn register() { let bn_settings = Settings::new(); @@ -26,8 +31,21 @@ impl LoadSettings { }); bn_settings.register_setting_json(Self::AUTO_LOAD_FILE_SETTING, &file_props.to_string()); + let operand_enums_props = json!({ + "title" : "Apply IDB Enum Operands", + "type" : "boolean", + "default" : false, + "description" : "Display operands against the enumeration IDA assigned them. This is \ + applied once analysis has created the functions and only affects the handful of \ + operands IDA displays as enumeration members, so it is inexpensive." + }); + bn_settings.register_setting_json( + Self::APPLY_OPERAND_ENUMS_SETTING, + &operand_enums_props.to_string(), + ); + let operand_formats_props = json!({ - "title" : "Apply IDB Operand Formats", + "title" : "Apply IDB Operand Number Formats", "type" : "boolean", "default" : false, "description" : "Apply the per-operand number formats (hexadecimal, decimal, \ @@ -39,6 +57,20 @@ impl LoadSettings { Self::APPLY_OPERAND_FORMATS_SETTING, &operand_formats_props.to_string(), ); + + let skip_default_formats_props = json!({ + "title" : "Skip Default IDB Operand Number Formats", + "type" : "boolean", + "default" : true, + "description" : "When applying IDB operand number formats, skip operands whose format \ + already matches Binary Ninja's default rendering (hexadecimal). On large databases \ + most formatted operands are plain hexadecimal, so skipping them greatly reduces \ + the work without changing the displayed result." + }); + bn_settings.register_setting_json( + Self::SKIP_DEFAULT_OPERAND_FORMATS_SETTING, + &skip_default_formats_props.to_string(), + ); } pub fn from_view_settings(view: &BinaryView) -> Self { @@ -53,10 +85,18 @@ impl LoadSettings { load_settings.auto_load_file = Some(path); } } + if settings.contains(Self::APPLY_OPERAND_ENUMS_SETTING) { + load_settings.apply_operand_enums = + settings.get_bool_with_opts(Self::APPLY_OPERAND_ENUMS_SETTING, &mut query_opts); + } if settings.contains(Self::APPLY_OPERAND_FORMATS_SETTING) { load_settings.apply_operand_formats = settings.get_bool_with_opts(Self::APPLY_OPERAND_FORMATS_SETTING, &mut query_opts); } + if settings.contains(Self::SKIP_DEFAULT_OPERAND_FORMATS_SETTING) { + load_settings.skip_default_operand_formats = settings + .get_bool_with_opts(Self::SKIP_DEFAULT_OPERAND_FORMATS_SETTING, &mut query_opts); + } load_settings } } @@ -65,7 +105,9 @@ impl Default for LoadSettings { fn default() -> Self { Self { auto_load_file: None, + apply_operand_enums: false, apply_operand_formats: false, + skip_default_operand_formats: true, } } }