Skip to content

Commit 875a10f

Browse files
committed
Introduce dedicated type for processing instruction
1 parent 0f9444a commit 875a10f

File tree

10 files changed

+186
-31
lines changed

10 files changed

+186
-31
lines changed

Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
### Misc Changes
1818

19+
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.
20+
21+
[#650]: https://github.com/tafia/quick-xml/issues/650
22+
1923

2024
## 0.32.0 -- 2024-06-10
2125

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ where
4141
}
4242
Ok(Event::Text(ref e))
4343
| Ok(Event::Comment(ref e))
44-
| Ok(Event::PI(ref e))
4544
| Ok(Event::DocType(ref e)) => {
4645
debug_format!(e);
4746
if let Err(err) = e.unescape() {
@@ -56,6 +55,9 @@ where
5655
break;
5756
}
5857
}
58+
Ok(Event::PI(ref e)) => {
59+
debug_format!(e);
60+
}
5961
Ok(Event::Decl(ref e)) => {
6062
debug_format!(e);
6163
let _ = black_box(e.version());

fuzz/fuzz_targets/structured_roundtrip.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use arbitrary::{Arbitrary, Unstructured};
44
use libfuzzer_sys::fuzz_target;
5-
use quick_xml::events::{BytesCData, BytesText, Event};
5+
use quick_xml::events::{BytesCData, BytesPI, BytesText, Event};
66
use quick_xml::reader::{Config, NsReader, Reader};
77
use quick_xml::writer::Writer;
88
use std::{hint::black_box, io::Cursor};
@@ -71,7 +71,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
7171
_ = element_writer.write_cdata_content(BytesCData::new(*text))?;
7272
}
7373
WritePiContent(text) => {
74-
_ = element_writer.write_pi_content(BytesText::from_escaped(*text))?;
74+
_ = element_writer.write_pi_content(BytesPI::new(*text))?;
7575
}
7676
WriteEmpty => {
7777
_ = element_writer.write_empty()?;

src/events/mod.rs

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ use crate::escape::{
5050
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
5151
};
5252
use crate::name::{LocalName, QName};
53-
use crate::reader::is_whitespace;
53+
use crate::reader::{is_whitespace, name_len};
5454
use crate::utils::write_cow_string;
5555
#[cfg(feature = "serialize")]
5656
use crate::utils::CowRef;
@@ -992,6 +992,157 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
992992

993993
////////////////////////////////////////////////////////////////////////////////////////////////////
994994

995+
/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
996+
///
997+
/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
998+
#[derive(Clone, Eq, PartialEq)]
999+
pub struct BytesPI<'a> {
1000+
content: BytesStart<'a>,
1001+
}
1002+
1003+
impl<'a> BytesPI<'a> {
1004+
/// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1005+
#[inline]
1006+
pub(crate) fn wrap(content: &'a [u8], target_len: usize) -> Self {
1007+
Self {
1008+
content: BytesStart::wrap(content, target_len),
1009+
}
1010+
}
1011+
1012+
/// Creates a new `BytesPI` from a string.
1013+
///
1014+
/// # Warning
1015+
///
1016+
/// `content` must not contain the `?>` sequence.
1017+
#[inline]
1018+
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1019+
let buf = str_cow_to_bytes(content);
1020+
let name_len = name_len(&buf);
1021+
Self {
1022+
content: BytesStart { buf, name_len },
1023+
}
1024+
}
1025+
1026+
/// Ensures that all data is owned to extend the object's lifetime if
1027+
/// necessary.
1028+
#[inline]
1029+
pub fn into_owned(self) -> BytesPI<'static> {
1030+
BytesPI {
1031+
content: self.content.into_owned().into(),
1032+
}
1033+
}
1034+
1035+
/// Extracts the inner `Cow` from the `BytesPI` event container.
1036+
#[inline]
1037+
pub fn into_inner(self) -> Cow<'a, [u8]> {
1038+
self.content.buf
1039+
}
1040+
1041+
/// Converts the event into a borrowed event.
1042+
#[inline]
1043+
pub fn borrow(&self) -> BytesPI {
1044+
BytesPI {
1045+
content: self.content.borrow(),
1046+
}
1047+
}
1048+
1049+
/// A target used to identify the application to which the instruction is directed.
1050+
///
1051+
/// # Example
1052+
///
1053+
/// ```
1054+
/// # use pretty_assertions::assert_eq;
1055+
/// use quick_xml::events::BytesPI;
1056+
///
1057+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1058+
/// assert_eq!(instruction.target(), b"xml-stylesheet");
1059+
/// ```
1060+
#[inline]
1061+
pub fn target(&self) -> &[u8] {
1062+
self.content.name().0
1063+
}
1064+
1065+
/// Content of the processing instruction. Contains everything between target
1066+
/// name and the end of the instruction. A direct consequence is that the first
1067+
/// character is always a space character.
1068+
///
1069+
/// # Example
1070+
///
1071+
/// ```
1072+
/// # use pretty_assertions::assert_eq;
1073+
/// use quick_xml::events::BytesPI;
1074+
///
1075+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1076+
/// assert_eq!(instruction.content(), br#" href="style.css""#);
1077+
/// ```
1078+
#[inline]
1079+
pub fn content(&self) -> &[u8] {
1080+
self.content.attributes_raw()
1081+
}
1082+
1083+
/// A view of the processing instructions' content as a list of key-value pairs.
1084+
///
1085+
/// Key-value pairs are used in some processing instructions, for example in
1086+
/// `<?xml-stylesheet?>`.
1087+
///
1088+
/// Returned iterator does not validate attribute values as may required by
1089+
/// target's rules. For example, it doesn't check that substring `?>` is not
1090+
/// present in the attribute value. That shouldn't be the problem when event
1091+
/// is produced by the reader, because reader detects end of processing instruction
1092+
/// by the first `?>` sequence, as required by the specification, and therefore
1093+
/// this sequence cannot appear inside it.
1094+
///
1095+
/// # Example
1096+
///
1097+
/// ```
1098+
/// # use pretty_assertions::assert_eq;
1099+
/// use std::borrow::Cow;
1100+
/// use quick_xml::events::attributes::Attribute;
1101+
/// use quick_xml::events::BytesPI;
1102+
/// use quick_xml::name::QName;
1103+
///
1104+
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1105+
/// for attr in instruction.attributes() {
1106+
/// assert_eq!(attr, Ok(Attribute {
1107+
/// key: QName(b"href"),
1108+
/// value: Cow::Borrowed(b"style.css"),
1109+
/// }));
1110+
/// }
1111+
/// ```
1112+
#[inline]
1113+
pub fn attributes(&self) -> Attributes {
1114+
self.content.attributes()
1115+
}
1116+
}
1117+
1118+
impl<'a> Debug for BytesPI<'a> {
1119+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1120+
write!(f, "BytesPI {{ content: ")?;
1121+
write_cow_string(f, &self.content.buf)?;
1122+
write!(f, " }}")
1123+
}
1124+
}
1125+
1126+
impl<'a> Deref for BytesPI<'a> {
1127+
type Target = [u8];
1128+
1129+
fn deref(&self) -> &[u8] {
1130+
&self.content
1131+
}
1132+
}
1133+
1134+
#[cfg(feature = "arbitrary")]
1135+
impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1136+
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1137+
Ok(Self::new(<&str>::arbitrary(u)?))
1138+
}
1139+
fn size_hint(depth: usize) -> (usize, Option<usize>) {
1140+
return <&str as arbitrary::Arbitrary>::size_hint(depth);
1141+
}
1142+
}
1143+
1144+
////////////////////////////////////////////////////////////////////////////////////////////////////
1145+
9951146
/// Event emitted by [`Reader::read_event_into`].
9961147
///
9971148
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
@@ -1013,7 +1164,7 @@ pub enum Event<'a> {
10131164
/// XML declaration `<?xml ...?>`.
10141165
Decl(BytesDecl<'a>),
10151166
/// Processing instruction `<?...?>`.
1016-
PI(BytesText<'a>),
1167+
PI(BytesPI<'a>),
10171168
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
10181169
DocType(BytesText<'a>),
10191170
/// End of XML document.

src/reader/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,7 @@ mod test {
16971697

16981698
/// Ensures, that no empty `Text` events are generated
16991699
mod $read_event {
1700-
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
1700+
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
17011701
use crate::reader::Reader;
17021702
use pretty_assertions::assert_eq;
17031703

@@ -1767,7 +1767,7 @@ mod test {
17671767

17681768
assert_eq!(
17691769
reader.$read_event($buf) $(.$await)? .unwrap(),
1770-
Event::PI(BytesText::from_escaped("xml-stylesheet '? >\" "))
1770+
Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
17711771
);
17721772
}
17731773

@@ -1848,7 +1848,7 @@ mod test {
18481848
$(, $async:ident, $await:ident)?
18491849
) => {
18501850
mod small_buffers {
1851-
use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
1851+
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
18521852
use crate::reader::Reader;
18531853
use pretty_assertions::assert_eq;
18541854

@@ -1882,7 +1882,7 @@ mod test {
18821882

18831883
assert_eq!(
18841884
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1885-
Event::PI(BytesText::new("pi"))
1885+
Event::PI(BytesPI::new("pi"))
18861886
);
18871887
assert_eq!(
18881888
reader.$read_event(&mut buf) $(.$await)? .unwrap(),

src/reader/state.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use encoding_rs::UTF_8;
33

44
use crate::encoding::Decoder;
55
use crate::errors::{Error, IllFormedError, Result, SyntaxError};
6-
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
6+
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
77
#[cfg(feature = "encoding")]
88
use crate::reader::EncodingRef;
99
use crate::reader::{is_whitespace, name_len, BangType, Config, ParseState};
@@ -242,7 +242,7 @@ impl ReaderState {
242242

243243
Ok(Event::Decl(event))
244244
} else {
245-
Ok(Event::PI(BytesText::wrap(content, self.decoder())))
245+
Ok(Event::PI(BytesPI::wrap(content, name_len(content))))
246246
}
247247
} else {
248248
// <?....EOF

src/writer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::result::Result as StdResult;
66

77
use crate::encoding::UTF8_BOM;
88
use crate::errors::{Error, Result};
9-
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
9+
use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};
1010

1111
#[cfg(feature = "async-tokio")]
1212
mod async_tokio;
@@ -551,10 +551,10 @@ impl<'a, W: Write> ElementWriter<'a, W> {
551551
}
552552

553553
/// Write a processing instruction `<?...?>` inside the current element.
554-
pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer<W>> {
554+
pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer<W>> {
555555
self.writer
556556
.write_event(Event::Start(self.start_tag.borrow()))?;
557-
self.writer.write_event(Event::PI(text))?;
557+
self.writer.write_event(Event::PI(pi))?;
558558
self.writer
559559
.write_event(Event::End(self.start_tag.to_end()))?;
560560
Ok(self.writer)

src/writer/async_tokio.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::result::Result as StdResult;
44
use tokio::io::{AsyncWrite, AsyncWriteExt};
55

66
use crate::errors::{Error, Result};
7-
use crate::events::{BytesCData, BytesText, Event};
7+
use crate::events::{BytesCData, BytesPI, BytesText, Event};
88
use crate::{ElementWriter, Writer};
99

1010
impl<W: AsyncWrite + Unpin> Writer<W> {
@@ -173,7 +173,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
173173
///
174174
/// ```
175175
/// # use quick_xml::writer::Writer;
176-
/// # use quick_xml::events::BytesText;
176+
/// # use quick_xml::events::BytesPI;
177177
/// # use tokio::io::AsyncWriteExt;
178178
/// # #[tokio::main(flavor = "current_thread")] async fn main() {
179179
/// let mut buffer = Vec::new();
@@ -184,9 +184,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
184184
/// .create_element("paired")
185185
/// .with_attribute(("attr1", "value1"))
186186
/// .with_attribute(("attr2", "value2"))
187-
/// // NOTE: We cannot use BytesText::new here, because it escapes strings,
188-
/// // but processing instruction content should not be escaped
189-
/// .write_pi_content_async(BytesText::from_escaped(r#"xml-stylesheet href="style.css""#))
187+
/// .write_pi_content_async(BytesPI::new(r#"xml-stylesheet href="style.css""#))
190188
/// .await
191189
/// .expect("cannot write content");
192190
///
@@ -199,7 +197,7 @@ impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> {
199197
/// </paired>"#
200198
/// );
201199
/// # }
202-
pub async fn write_pi_content_async(self, text: BytesText<'_>) -> Result<&'a mut Writer<W>> {
200+
pub async fn write_pi_content_async(self, text: BytesPI<'_>) -> Result<&'a mut Writer<W>> {
203201
self.writer
204202
.write_event_async(Event::Start(self.start_tag.borrow()))
205203
.await?;
@@ -363,7 +361,7 @@ mod tests {
363361

364362
test!(
365363
pi,
366-
Event::PI(BytesText::new("this is a processing instruction")),
364+
Event::PI(BytesPI::new("this is a processing instruction")),
367365
r#"<?this is a processing instruction?>"#
368366
);
369367

0 commit comments

Comments
 (0)