@@ -22,45 +22,46 @@ class RestrictedElement(_etree.ElementBase):
2222 __slots__ = ()
2323 blacklist = (_etree ._Entity , _etree ._ProcessingInstruction , _etree ._Comment ) # pylint: disable=protected-access
2424
25- def _filter (self , iterator ): # pylint: disable=missing-function-docstring
25+ def _filter (self , iterator ):
26+ """Yield only elements not in the blacklist from the given iterator."""
2627 blacklist = self .blacklist
2728 for child in iterator :
2829 if isinstance (child , blacklist ):
2930 continue
3031 yield child
3132
3233 def __iter__ (self ):
33- iterator = super (RestrictedElement , self ).__iter__ () # pylint: disable=super-with-arguments
34+ iterator = super ().__iter__ ()
3435 return self ._filter (iterator )
3536
3637 def iterchildren (self , tag = None , reversed = False ): # pylint: disable=redefined-builtin
37- iterator = super (RestrictedElement , self ).iterchildren ( # pylint: disable=super-with-arguments
38- tag = tag , reversed = reversed
39- )
38+ """Iterate over child elements while excluding blacklisted nodes."""
39+ iterator = super ().iterchildren (tag = tag , reversed = reversed )
4040 return self ._filter (iterator )
4141
4242 def iter (self , tag = None , * tags ): # pylint: disable=keyword-arg-before-vararg
43- iterator = super (RestrictedElement , self ).iter (tag = tag , * tags ) # pylint: disable=super-with-arguments
43+ """Iterate over the element tree excluding blacklisted nodes."""
44+ iterator = super ().iter (tag = tag , * tags )
4445 return self ._filter (iterator )
4546
4647 def iterdescendants (self , tag = None , * tags ): # pylint: disable=keyword-arg-before-vararg
47- iterator = super (RestrictedElement , self ).iterdescendants ( # pylint: disable=super-with-arguments
48- tag = tag , * tags
49- )
48+ """Iterate over descendants while filtering out blacklisted nodes."""
49+ iterator = super ().iterdescendants (tag = tag , * tags )
5050 return self ._filter (iterator )
5151
5252 def itersiblings (self , tag = None , preceding = False ):
53- iterator = super (RestrictedElement , self ).itersiblings ( # pylint: disable=super-with-arguments
54- tag = tag , preceding = preceding
55- )
53+ """Iterate over siblings excluding blacklisted node types."""
54+ iterator = super ().itersiblings (tag = tag , preceding = preceding )
5655 return self ._filter (iterator )
5756
5857 def getchildren (self ):
59- iterator = super (RestrictedElement , self ).__iter__ () # pylint: disable=super-with-arguments
58+ """Return a list of non-blacklisted child elements."""
59+ iterator = super ().__iter__ ()
6060 return list (self ._filter (iterator ))
6161
6262 def getiterator (self , tag = None ):
63- iterator = super (RestrictedElement , self ).getiterator (tag ) # pylint: disable=super-with-arguments
63+ """Iterate over the tree with blacklisted nodes filtered out."""
64+ iterator = super ().getiterator (tag )
6465 return self ._filter (iterator )
6566
6667
@@ -73,27 +74,30 @@ class GlobalParserTLS(threading.local):
7374
7475 element_class = RestrictedElement
7576
76- def createDefaultParser (self ): # pylint: disable=missing-function-docstring
77+ def create_default_parser (self ):
78+ """Create a secure XMLParser using the restricted element class."""
7779 parser = _etree .XMLParser (** self .parser_config )
7880 element_class = self .element_class
7981 if self .element_class is not None :
8082 lookup = _etree .ElementDefaultClassLookup (element = element_class )
8183 parser .set_element_class_lookup (lookup )
8284 return parser
8385
84- def setDefaultParser (self , parser ):
86+ def set_default_parser (self , parser ):
87+ """Store a thread-local default XML parser instance."""
8588 self ._default_parser = parser # pylint: disable=attribute-defined-outside-init
8689
87- def getDefaultParser (self ): # pylint: disable=missing-function-docstring
90+ def get_default_parser (self ):
91+ """Return the thread-local default parser, creating it if missing."""
8892 parser = getattr (self , "_default_parser" , None )
8993 if parser is None :
90- parser = self .createDefaultParser ()
91- self .setDefaultParser (parser )
94+ parser = self .create_default_parser ()
95+ self .set_default_parser (parser )
9296 return parser
9397
9498
9599_parser_tls = GlobalParserTLS ()
96- getDefaultParser = _parser_tls .getDefaultParser
100+ get_default_parser = _parser_tls .get_default_parser
97101
98102
99103def check_docinfo (elementtree , forbid_dtd = False , forbid_entities = True ):
@@ -107,9 +111,7 @@ def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
107111 raise DTDForbidden (docinfo .doctype , docinfo .system_url , docinfo .public_id )
108112 if forbid_entities and not LXML3 :
109113 # lxml < 3 has no iterentities()
110- raise NotSupportedError (
111- "Unable to check for entity declarations in lxml 2.x"
112- ) # pylint: disable=implicit-str-concat
114+ raise NotSupportedError ("Unable to check for entity declarations in lxml 2.x" )
113115
114116 if forbid_entities :
115117 for dtd in docinfo .internalDTD , docinfo .externalDTD :
@@ -119,29 +121,28 @@ def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
119121 raise EntitiesForbidden (entity .name , entity .content , None , None , None , None )
120122
121123
122- def parse (
123- source , parser = None , base_url = None , forbid_dtd = False , forbid_entities = True
124- ): # pylint: disable=missing-function-docstring
124+ def parse (source , parser = None , base_url = None , forbid_dtd = False , forbid_entities = True ):
125+ """Securely parse XML from a source and enforce DTD/entity restrictions."""
125126 if parser is None :
126- parser = getDefaultParser ()
127+ parser = get_default_parser ()
127128 elementtree = _etree .parse (source , parser , base_url = base_url )
128129 check_docinfo (elementtree , forbid_dtd , forbid_entities )
129130 return elementtree
130131
131132
132- def fromstring (
133- text , parser = None , base_url = None , forbid_dtd = False , forbid_entities = True
134- ): # pylint: disable=missing-function-docstring
133+ def fromstring (text , parser = None , base_url = None , forbid_dtd = False , forbid_entities = True ):
134+ """Securely parse XML from a string and validate docinfo."""
135135 if parser is None :
136- parser = getDefaultParser ()
136+ parser = get_default_parser ()
137137 rootelement = _etree .fromstring (text , parser , base_url = base_url )
138138 elementtree = rootelement .getroottree ()
139139 check_docinfo (elementtree , forbid_dtd , forbid_entities )
140140 return rootelement
141141
142142
143- XML = fromstring
143+ XML = fromstring # pylint: disable=invalid-name
144144
145145
146146def iterparse (* args , ** kwargs ):
147+ """Disabled XML iterparse function that always raises NotSupportedError."""
147148 raise NotSupportedError ("iterparse not available" )
0 commit comments