diff --git a/bindgen-tests/tests/expectations/tests/issue-753.rs b/bindgen-tests/tests/expectations/tests/issue-753.rs index 3119ec569e..96264fd80b 100644 --- a/bindgen-tests/tests/expectations/tests/issue-753.rs +++ b/bindgen-tests/tests/expectations/tests/issue-753.rs @@ -1,4 +1,4 @@ #![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] -pub const CONST: u32 = 5; -pub const OTHER_CONST: u32 = 6; -pub const LARGE_CONST: u32 = 1536; +pub const CONST: ::std::os::raw::c_uint = 5; +pub const OTHER_CONST: ::std::os::raw::c_uint = 6; +pub const LARGE_CONST: ::std::os::raw::c_uint = 1536; diff --git a/bindgen-tests/tests/expectations/tests/issue-923.rs b/bindgen-tests/tests/expectations/tests/issue-923.rs new file mode 100644 index 0000000000..08053db038 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/issue-923.rs @@ -0,0 +1,4 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub const ULONG_ZERO: ::std::os::raw::c_ulong = 0; +pub const ULONGLONG_ZERO: ::std::os::raw::c_ulonglong = 0; +pub const CHAR_ZERO: ::std::os::raw::c_char = 0; diff --git a/bindgen-tests/tests/expectations/tests/libclang-9/issue-923.rs b/bindgen-tests/tests/expectations/tests/libclang-9/issue-923.rs new file mode 100644 index 0000000000..fe64295a68 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/libclang-9/issue-923.rs @@ -0,0 +1 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] \ No newline at end of file diff --git a/bindgen-tests/tests/expectations/tests/test_macro_fallback_non_system_dir.rs b/bindgen-tests/tests/expectations/tests/test_macro_fallback_non_system_dir.rs index bf9739f3fa..0b72e42d48 100644 --- a/bindgen-tests/tests/expectations/tests/test_macro_fallback_non_system_dir.rs +++ b/bindgen-tests/tests/expectations/tests/test_macro_fallback_non_system_dir.rs @@ -1,6 +1,6 @@ -pub const CONST: u32 = 5; -pub const OTHER_CONST: u32 = 6; -pub const LARGE_CONST: u32 = 1536; -pub const THE_CONST: u32 = 28; -pub const MY_CONST: u32 = 69; +pub const CONST: ::std::os::raw::c_uint = 5; +pub const OTHER_CONST: ::std::os::raw::c_uint = 6; +pub const LARGE_CONST: ::std::os::raw::c_uint = 1536; +pub const THE_CONST: ::std::os::raw::c_uint = 28; +pub const MY_CONST: ::std::os::raw::c_uint = 69; pub const NEGATIVE: i32 = -1; diff --git a/bindgen-tests/tests/headers/issue-753.h b/bindgen-tests/tests/headers/issue-753.h index 3a6c82528a..612b72884a 100644 --- a/bindgen-tests/tests/headers/issue-753.h +++ b/bindgen-tests/tests/headers/issue-753.h @@ -7,6 +7,6 @@ #define CONST UINT32_C(5) #define OTHER_CONST UINT32_C(6) -#define LARGE_CONST UINT32_C(6 << 8) +#define LARGE_CONST (UINT32_C(6) << 8) #endif diff --git a/bindgen-tests/tests/headers/issue-923.h b/bindgen-tests/tests/headers/issue-923.h new file mode 100644 index 0000000000..10e6553e76 --- /dev/null +++ b/bindgen-tests/tests/headers/issue-923.h @@ -0,0 +1,10 @@ +// bindgen-flags: --macro-const-use-ctypes + +#ifndef ISSUE_923_H +#define ISSUE_923_H + +#define ULONG_ZERO 0UL +#define ULONGLONG_ZERO 0ULL +#define CHAR_ZERO ((char)'\0') + +#endif diff --git a/bindgen-tests/tests/tests.rs b/bindgen-tests/tests/tests.rs index 6e3c358d3e..42795548a3 100644 --- a/bindgen-tests/tests/tests.rs +++ b/bindgen-tests/tests/tests.rs @@ -269,6 +269,17 @@ fn create_bindgen_builder(header: &Path) -> Result { let source = fs::File::open(header)?; let reader = BufReader::new(source); + // Safety: assume that headers in our test suite have distinct names. + let basename = header.file_name().unwrap(); + let per_test_folder = format!( + "./{}", + basename + .to_str() + .unwrap() + .replace('_', "__") + .replace('.', "_") + ); + fs::create_dir_all(&per_test_folder)?; // Scoop up bindgen-flags from test header let mut flags = Vec::with_capacity(2); @@ -334,6 +345,8 @@ fn create_bindgen_builder(header: &Path) -> Result { "#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)]", "--raw-line", "", + "--clang-macro-fallback-build-dir", + &per_test_folder, ]; let args = prepend.iter().map(ToString::to_string).chain(flags); diff --git a/bindgen/clang.rs b/bindgen/clang.rs index 9e614da9f8..325a66fd8c 100644 --- a/bindgen/clang.rs +++ b/bindgen/clang.rs @@ -2329,7 +2329,8 @@ impl EvalResult { }) } - fn kind(&self) -> CXEvalResultKind { + /// Return the kind of the evaluation result. + pub(crate) fn kind(&self) -> CXEvalResultKind { unsafe { clang_EvalResult_getKind(self.x) } } @@ -2370,6 +2371,29 @@ impl EvalResult { Some(value as i64) } + /// Try to resolve the result into a string literal. + /// This returns `None` if the result is not immediately a string literal. + pub(crate) fn as_str_literal(&self) -> Option> { + if !matches!( + self.kind(), + CXEval_StrLiteral | CXEval_CFStr | CXEval_ObjCStrLiteral, + ) { + return None; + } + // Safety: we are only copying the content, not assuming a borrow. + // TODO(@dingxiangfei2009): LLVM Libclang does not return the true size + // of a string literal, which could be truncated due to a null character + // '\0' in the middle. + let value = + unsafe { CStr::from_ptr(clang_EvalResult_getAsStr(self.x)) }; + Some(value.to_bytes().into()) + } + + /// Return the type of the value. + pub(crate) fn value_type(&self) -> Type { + self.ty + } + /// Evaluates the expression as a literal string, that may or may not be /// valid utf-8. pub(crate) fn as_literal_string(&self) -> Option> { diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index 346d2932f7..69bcebafd7 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -353,7 +353,7 @@ pub(crate) struct BindgenContext { /// hard errors while parsing duplicated macros, as well to allow macro /// expression parsing. /// - /// This needs to be an `std::HashMap` because the `cexpr` API requires it. + /// This needs to be an `std::HashMap` because the [`cexpr`] API requires it. parsed_macros: StdHashMap, cexpr::expr::EvalResult>, /// A map with all include locations. @@ -2055,10 +2055,12 @@ If you encounter an error missing from this list, please file an issue or a PR!" let mut header_names_to_compile = Vec::new(); let mut header_paths = Vec::new(); let mut header_includes = Vec::new(); - let single_header = self.options().input_headers.last().cloned()?; - for input_header in &self.options.input_headers - [..self.options.input_headers.len() - 1] - { + let [input_headers @ .., single_header] = + &self.options().input_headers[..] + else { + return None; + }; + for input_header in input_headers { let path = Path::new(input_header.as_ref()); if let Some(header_path) = path.parent() { if header_path == Path::new("") { @@ -2095,7 +2097,7 @@ If you encounter an error missing from this list, please file an issue or a PR!" } let mut tu = clang::TranslationUnit::parse( &index, - &single_header, + single_header, &c_args, &[], clang_sys::CXTranslationUnit_ForSerialization, @@ -2129,6 +2131,7 @@ If you encounter an error missing from this list, please file an issue or a PR!" } /// Get the currently parsed macros. + /// This map only contains macros accepted by [`cexpr`] pub(crate) fn parsed_macros( &self, ) -> &StdHashMap, cexpr::expr::EvalResult> { diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 9d72dcf06e..e2932c728a 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -177,8 +177,6 @@ impl ClangSubItemParser for Var { cursor: clang::Cursor, ctx: &mut BindgenContext, ) -> Result, ParseError> { - use cexpr::expr::EvalResult; - use cexpr::literal::CChar; use clang_sys::*; match cursor.kind() { CXCursor_MacroDefinition => { @@ -210,7 +208,9 @@ impl ClangSubItemParser for Var { // NB: It's important to "note" the macro even if the result is // not an integer, otherwise we might loose other kind of // derived macros. - ctx.note_parsed_macro(id.clone(), value.clone()); + if let ClangOrCexprMacroEval::Cexpr(value) = &value { + ctx.note_parsed_macro(id.clone(), value.clone()); + } if previously_defined { let name = String::from_utf8(id).unwrap(); @@ -223,23 +223,55 @@ impl ClangSubItemParser for Var { // enforce utf8 there, so we should have already panicked at // this point. let name = String::from_utf8(id).unwrap(); + let value = match value { + ClangOrCexprMacroEval::Cexpr(eval_result) => { + eval_result.into() + } + ClangOrCexprMacroEval::Clang(eval_result) => eval_result, + }; let (type_kind, val) = match value { - EvalResult::Invalid => return Err(ParseError::Continue), - EvalResult::Float(f) => { - (TypeKind::Float(FloatKind::Double), VarType::Float(f)) + MacroEvalResult::Invalid => { + return Err(ParseError::Continue) } - EvalResult::Char(c) => { - let c = match c { - CChar::Char(c) => { - assert_eq!(c.len_utf8(), 1); - c as u8 - } - CChar::Raw(c) => u8::try_from(c).unwrap(), - }; - - (TypeKind::Int(IntKind::U8), VarType::Char(c)) + MacroEvalResult::Float(float_lit, float_type) => { + (TypeKind::Float(float_type), VarType::Float(float_lit)) + } + MacroEvalResult::Char(MacroCharValue::SChar(char)) => ( + TypeKind::Int(IntKind::Char { is_signed: true }), + VarType::Int(char.into()), + ), + MacroEvalResult::Char(MacroCharValue::UChar(char)) => ( + TypeKind::Int(IntKind::Char { is_signed: true }), + VarType::Int(char.into()), + ), + MacroEvalResult::Char(MacroCharValue::WChar(char)) => { + (TypeKind::Int(IntKind::WChar), VarType::Int(char)) } - EvalResult::Str(val) => { + MacroEvalResult::Char(MacroCharValue::Char16(char)) => ( + TypeKind::Int(IntKind::Char16), + VarType::Int(char.into()), + ), + MacroEvalResult::Char(MacroCharValue::Char32(char)) => { + (TypeKind::Int(IntKind::U32), VarType::Int(char.into())) + } + MacroEvalResult::Char(MacroCharValue::Raw(char)) => { + let char = u8::try_from(char) + .map_err(|_| ParseError::Continue)?; + (TypeKind::Int(IntKind::U8), VarType::Char(char)) + } + MacroEvalResult::Char(MacroCharValue::RustChar(char)) => { + if char.len_utf8() != 1 { + return Err(ParseError::Continue); + } + ( + TypeKind::Int(IntKind::U8), + VarType::Char( + char.try_into() + .map_err(|_| ParseError::Continue)?, + ), + ) + } + MacroEvalResult::Str(val) => { let char_ty = Item::builtin_type( TypeKind::Int(IntKind::U8), true, @@ -250,13 +282,27 @@ impl ClangSubItemParser for Var { } (TypeKind::Pointer(char_ty), VarType::String(val)) } - EvalResult::Int(Wrapping(value)) => { - let kind = ctx + MacroEvalResult::Int(value, int_type) => { + let kind = if let Some(kind) = ctx .options() .last_callback(|c| c.int_macro(&name, value)) - .unwrap_or_else(|| { - default_macro_constant_type(ctx, value) - }); + { + kind + } else { + match int_type { + MacroIntType::Fit => { + default_macro_constant_type(ctx, value) + } + MacroIntType::Short => IntKind::Short, + MacroIntType::UShort => IntKind::UShort, + MacroIntType::Int => IntKind::Int, + MacroIntType::UInt => IntKind::UInt, + MacroIntType::Long => IntKind::Long, + MacroIntType::LongLong => IntKind::LongLong, + MacroIntType::ULong => IntKind::ULong, + MacroIntType::ULongLong => IntKind::ULongLong, + } + }; (TypeKind::Int(kind), VarType::Int(value)) } @@ -378,6 +424,62 @@ impl ClangSubItemParser for Var { } } +/// Integer type for a macro expansion. +#[derive(Clone, Copy, Debug)] +pub(crate) enum MacroIntType { + /// This instructs the type lowering to use the smallest integer type that + /// holds the value. + Fit, + /// This type should be lowered into `c_short` or equivalent. + Short, + /// This type should be lowered into `c_ushort` or equivalent. + UShort, + /// This type should be lowered into `c_int` or equivalent. + Int, + /// This type should be lowered into `c_uint` or equivalent. + UInt, + /// This type should be lowered into `c_long` or equivalent. + Long, + /// This type should be lowered into `c_ulong` or equivalent. + ULong, + /// This type should be lowered into `c_longlong` or equivalent. + LongLong, + /// This type should be lowered into `c_ulonglong` or equivalent. + ULongLong, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum MacroCharValue { + /// Arbitrary value reported as a character + Raw(u64), + /// At least 16bit unsigned integer + Char16(u16), + /// At least 32bit unsigned integer + Char32(u32), + /// At least 8bit unsigned integer + UChar(u8), + /// At least 8bit signed integer + SChar(i8), + /// At least 16bit, maybe signed or unsigned, integer + WChar(i64), + /// Known Rust-style character + RustChar(char), +} + +#[derive(Clone, Debug)] +pub(crate) enum MacroEvalResult { + /// An integer literal expression + Int(i64, MacroIntType), + /// A string literal expression + Str(Vec), + /// A floating point literal expression + Float(f64, FloatKind), + /// A character value literal expression + Char(MacroCharValue), + /// No value: macro evaluation failed + Invalid, +} + /// This function uses a [`FallbackTranslationUnit`][clang::FallbackTranslationUnit] to parse each /// macro that cannot be parsed by the normal bindgen process for `#define`s. /// @@ -390,7 +492,9 @@ impl ClangSubItemParser for Var { fn parse_macro_clang_fallback( ctx: &mut BindgenContext, cursor: &clang::Cursor, -) -> Option<(Vec, cexpr::expr::EvalResult)> { +) -> Option<(Vec, MacroEvalResult)> { + use clang_sys::*; + if !ctx.options().clang_macro_fallback { return None; } @@ -410,21 +514,128 @@ fn parse_macro_clang_fallback( let macro_stmt = all_stmts.first()?; // Children should all be expressions from the compound statement let paren_exprs = macro_stmt.collect_children(); - // First child in all_exprs is the expression utilizing the given macro to be evaluated - // Should be ParenExpr + // First child in all_exprs is the expression utilizing the given macro to + // be evaluated, which hould be ParenExpr let paren = paren_exprs.first()?; - + let result = paren.evaluate()?; + let value_type = result.value_type().canonical_type(); + if !value_type.is_valid() { + return None; + } + let name = cursor.spelling().into_bytes(); Some(( - cursor.spelling().into_bytes(), - cexpr::expr::EvalResult::Int(Wrapping(paren.evaluate()?.as_int()?)), + name, + if let Some(int_lit) = result.as_int() { + match value_type.kind() { + // Integers + CXType_Int => MacroEvalResult::Int(int_lit, MacroIntType::Int), + CXType_UInt => { + MacroEvalResult::Int(int_lit, MacroIntType::UInt) + } + CXType_Short => { + MacroEvalResult::Int(int_lit, MacroIntType::Short) + } + CXType_UShort => { + MacroEvalResult::Int(int_lit, MacroIntType::UShort) + } + CXType_Long => { + MacroEvalResult::Int(int_lit, MacroIntType::Long) + } + CXType_ULong => { + MacroEvalResult::Int(int_lit, MacroIntType::ULong) + } + CXType_LongLong => { + MacroEvalResult::Int(int_lit, MacroIntType::LongLong) + } + CXType_ULongLong => { + MacroEvalResult::Int(int_lit, MacroIntType::ULongLong) + } + // Chars + CXType_Char16 => MacroEvalResult::Char(MacroCharValue::Char16( + int_lit.try_into().ok()?, + )), + CXType_Char32 => MacroEvalResult::Char(MacroCharValue::Char32( + int_lit.try_into().ok()?, + )), + CXType_Char_S | CXType_SChar => MacroEvalResult::Char( + MacroCharValue::SChar(int_lit.try_into().ok()?), + ), + CXType_Char_U | CXType_UChar => MacroEvalResult::Char( + MacroCharValue::UChar(int_lit.try_into().ok()?), + ), + CXType_WChar => { + MacroEvalResult::Char(MacroCharValue::WChar(int_lit)) + } + _ => { + warn!( + "Clang recognised an integeral literal for {} but + Bindgen does not support it yet.", + cursor.spelling(), + ); + return None; + } + } + } else if let Some(str_lit) = result.as_str_literal() { + MacroEvalResult::Str(str_lit) + } else if let Some(float_lit) = result.as_double() { + MacroEvalResult::Float( + float_lit, + match value_type.kind() { + CXType_Float => FloatKind::Float, + CXType_BFloat16 | CXType_Float16 => FloatKind::Float16, + CXType_Double => FloatKind::Double, + CXType_LongDouble => FloatKind::LongDouble, + CXType_Float128 => FloatKind::Float128, + _ => { + warn!( + "Clang recognised a floating point literal for {} but + Bindgen does not support it yet.", + cursor.spelling(), + ); + return None; + } + }, + ) + } else { + return None; + }, )) } +impl From for MacroEvalResult { + fn from(value: cexpr::expr::EvalResult) -> Self { + match value { + cexpr::expr::EvalResult::Int(wrapping) => { + MacroEvalResult::Int(wrapping.0, MacroIntType::Fit) + } + cexpr::expr::EvalResult::Float(float_lit) => { + MacroEvalResult::Float(float_lit, FloatKind::Double) + } + cexpr::expr::EvalResult::Char(cexpr::literal::CChar::Char( + char, + )) => MacroEvalResult::Char(MacroCharValue::RustChar(char)), + cexpr::expr::EvalResult::Char(cexpr::literal::CChar::Raw(char)) => { + MacroEvalResult::Char(MacroCharValue::Raw(char)) + } + cexpr::expr::EvalResult::Str(str_lit) => { + MacroEvalResult::Str(str_lit) + } + cexpr::expr::EvalResult::Invalid => MacroEvalResult::Invalid, + } + } +} + +#[derive(Debug)] +enum ClangOrCexprMacroEval { + Cexpr(cexpr::expr::EvalResult), + Clang(MacroEvalResult), +} + /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &mut BindgenContext, cursor: &clang::Cursor, -) -> Option<(Vec, cexpr::expr::EvalResult)> { +) -> Option<(Vec, ClangOrCexprMacroEval)> { use cexpr::expr; let mut cexpr_tokens = cursor.cexpr_tokens(); @@ -433,11 +644,22 @@ fn parse_macro( callbacks.modify_macro(&cursor.spelling(), &mut cexpr_tokens); } + let clang_fallback = |ctx| { + parse_macro_clang_fallback(ctx, cursor) + .map(|(name, result)| (name, ClangOrCexprMacroEval::Clang(result))) + }; + + if ctx.options().macro_const_use_ctypes { + return clang_fallback(ctx); + } + let parser = expr::IdentifierParser::new(ctx.parsed_macros()); match parser.macro_definition(&cexpr_tokens) { - Ok((_, (id, val))) => Some((id.into(), val)), - _ => parse_macro_clang_fallback(ctx, cursor), + Ok((_, (id, val))) => { + Some((id.into(), ClangOrCexprMacroEval::Cexpr(val))) + } + _ => clang_fallback(ctx), } } diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index 5304862584..c7fe6ccbfc 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -505,6 +505,10 @@ struct BindgenCommand { /// Use DSTs to represent structures with flexible array members. #[arg(long)] flexarray_dst: bool, + /// Resolve the true type of macro definition as constant and use + /// corresponding C types. + #[arg(long)] + macro_const_use_ctypes: bool, /// Derive custom traits on any kind of type. The CUSTOM value must be of the shape REGEX=DERIVE where DERIVE is a comma-separated list of derive macros. #[arg(long, value_name = "CUSTOM", value_parser = parse_custom_derive)] with_derive_custom: Vec<(Vec, String)>, @@ -697,6 +701,7 @@ where clang_macro_fallback, clang_macro_fallback_build_dir, flexarray_dst, + macro_const_use_ctypes, with_derive_custom, with_derive_custom_struct, with_derive_custom_enum, @@ -1003,6 +1008,7 @@ where clang_macro_fallback => |b, _| b.clang_macro_fallback(), clang_macro_fallback_build_dir, flexarray_dst, + macro_const_use_ctypes => |b, _| b.macro_const_use_ctypes(), wrap_static_fns, wrap_static_fns_path, wrap_static_fns_suffix, diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index baa541c5ac..219a527fff 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -1109,6 +1109,21 @@ options! { }, as_args: "--ctypes-prefix", }, + /// Use C platform-specific types for generated macro constants. + macro_const_use_ctypes: bool { + methods: { + /// Use C platform-specific types for generated macro constants. + /// + /// This implies `clang_macro_fallback` and takes precedence over + /// `default_macro_constant_type`. + pub fn macro_const_use_ctypes(mut self) -> Builder { + self.options.macro_const_use_ctypes = true; + self.options.clang_macro_fallback = true; + self + } + }, + as_args: "--macro-const-use-ctypes", + } /// The prefix for anonymous fields. anon_fields_prefix: String { default: DEFAULT_ANON_FIELDS_PREFIX.into(),