Skip to content

Commit 7821b1b

Browse files
committed
Rewrite the zip tree building algorithm
- Handles the case when the generated ``filelist`` is not in order. - The outer loop: iterate through each file in the list and break the full path into segments. - The inner loop: iterate through each segment, add a new node if not found in the child nodes of its parent and update the node with the file's details if the segment is the last one. - The full tree is complete at the end of only a single pass.
1 parent e8f40ad commit 7821b1b

File tree

1 file changed

+116
-74
lines changed

1 file changed

+116
-74
lines changed

mfr/extensions/zip/render.py

Lines changed: 116 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
2-
from typing import List
3-
from zipfile import ZipFile
2+
from typing import List, Union
3+
from zipfile import ZipFile, ZipInfo
44

55
from mako.lookup import TemplateLookup
66

@@ -27,82 +27,108 @@ def render(self):
2727

2828
zip_file = ZipFile(self.file_path, 'r')
2929

30-
file_list = self.sanitize_file_list(zip_file.filelist)
31-
file_tree = self.file_list_to_tree(file_list)
30+
# ``ZipFile.filelist`` contains both files and folder. Using ``obj`` for better clarity.
31+
obj_list = self.sanitize_obj_list(zip_file.filelist)
32+
obj_tree = self.obj_list_to_tree(obj_list)
3233

33-
return self.TEMPLATE.render(data=file_tree, base=self.assets_url)
34+
return self.TEMPLATE.render(data=obj_tree, base=self.assets_url)
3435

35-
def file_list_to_tree(self, file_list: list) -> List[dict]:
36-
"""Build the file tree and return a "tree".
36+
def obj_list_to_tree(self, obj_list: list) -> List[dict]:
37+
"""Build the object tree from the object list. Each node is represented using a dictionary,
38+
where non-leaf nodes represent folders and leaves represent files. Return a list which
39+
contains only one element: the root node.
3740
38-
TODO: Fix this algorithm
39-
This algorithm only works when the ``file_list`` are in strict alphabetical order. Here is
40-
an example file A.zip where list 1 fails while list 2 succeed.
41-
42-
A.zip
43-
--- A/
44-
--- A/aa.png
45-
--- B/ab.png
46-
47-
File list 1: [ A/, A/B/, A/A/, A/A/aa.png, A/B/ab.png, ]
48-
49-
File list 2: [ A/, A/A/, A/A/aa.png, A/B/, A/B/ab.png, ]
50-
51-
:param file_list: the sanitized file list
41+
:param obj_list: the object list
5242
:rtype: ``List[dict]``
53-
:return: a "tree" in form of a list which contains one dictionary as the root node
43+
:return: a list which contains only one element: the root node.
5444
"""
5545

56-
icons_url = self.assets_url + '/img'
57-
58-
# Build the root of the file tree
59-
tree_root = [{
46+
# Build the root node of the tree
47+
tree_root = {
6048
'text': self.metadata.name + self.metadata.ext,
61-
'icon': icons_url + '/file-ext-zip.png',
49+
'icon': self.assets_url + '/img/file-ext-zip.png',
6250
'children': []
63-
}]
64-
65-
# Iteratively build the file tree for each file and folder.egments.
66-
for file in file_list:
67-
68-
node_path = tree_root[0]
69-
70-
# Split the full path into segments, add each path segment to the tree if the segment
71-
# doesn't already exist. The segments can be either a folder or a file.
72-
paths = [path for path in file.filename.split('/') if path]
73-
for path in paths:
74-
75-
# Add a child to the node
76-
if not len(node_path['children']) or node_path['children'][-1]['text'] != path:
77-
78-
new_node = {'text': path, 'children': []}
79-
80-
date = '%d-%02d-%02d %02d:%02d:%02d' % file.date_time[:6]
81-
size = sizeof_fmt(int(file.file_size)) if file.file_size else ''
82-
new_node['data'] = {'date': date, 'size': size}
51+
}
52+
53+
for obj in obj_list:
54+
55+
# For each object, always start from the root of the tree
56+
parent = tree_root
57+
path_from_root = obj.filename
58+
is_folder = path_from_root[-1] == '/'
59+
path_segments = [segment for segment in path_from_root.split('/') if segment]
60+
last_index = len(path_segments) - 1
61+
62+
# Iterate through the path segments list. Add the segment to tree if not already there
63+
# and update the details with the current object if it is the last one along the path.
64+
for index, segment in enumerate(path_segments):
65+
66+
# Check if the segment has already been added
67+
siblings = parent.get('children', [])
68+
current_node = self.find_node_among_siblings(segment, siblings)
69+
70+
# Found
71+
if current_node:
72+
if index == last_index:
73+
# If it is the last segment, this node must be a folder and represents the
74+
# current object. Update it with the objects' info and break.
75+
assert is_folder
76+
self.update_node_with_attributes(current_node, obj, is_folder=is_folder)
77+
break
78+
# Otherwise, jump to the next segment with the current node as the new parent
79+
parent = current_node
80+
continue
81+
82+
# Not found
83+
new_node = {
84+
'text': segment,
85+
'children': [],
86+
}
87+
if index == last_index:
88+
# If it is the last segment, the node represents the current object. Update the
89+
# it with the objects' info, add it to the siblings and break.
90+
self.update_node_with_attributes(new_node, obj, is_folder=is_folder)
91+
siblings.append(new_node)
92+
break
93+
94+
# Otherwise, append the new node to tree, jump to the next segment with the current
95+
# node as the new parent
96+
siblings.append(new_node)
97+
parent = new_node
98+
continue
8399

84-
if file.filename[-1] == '/':
85-
new_node['icon'] = icons_url + '/folder.png'
86-
else:
87-
ext = os.path.splitext(file.filename)[1].lstrip('.')
88-
if ext:
89-
ext = ext.lower()
90-
if self.icon_exists_for_type(ext):
91-
new_node['icon'] = '{}/file-ext-{}.png'.format(icons_url, ext)
92-
else:
93-
new_node['icon'] = '{}/file-ext-generic.png'.format(icons_url)
100+
return [tree_root, ]
94101

95-
node_path['children'].append(new_node)
102+
def update_node_with_attributes(self, node: dict, obj: ZipInfo, is_folder: bool) -> None:
103+
"""Update details (date, size, icon, etc.) of the node with the given object.
96104
97-
node_path = new_node
98-
# Go one level deeper
99-
else:
100-
node_path = node_path['children'][-1]
105+
:param node: the node to update
106+
:param obj: the object that the node represents
107+
:param is_folder: the folder flag
108+
"""
101109

102-
return tree_root
110+
date = '%d-%02d-%02d %02d:%02d:%02d' % obj.date_time[:6]
111+
size = sizeof_fmt(int(obj.file_size)) if obj.file_size else ''
112+
113+
if is_folder:
114+
icon_path = self.assets_url + '/img/folder.png'
115+
else:
116+
ext = (os.path.splitext(obj.filename)[1].lstrip('.')).lower()
117+
if self.icon_exists(ext):
118+
icon_path = '{}/img/file-ext-{}.png'.format(self.assets_url, ext)
119+
else:
120+
icon_path = '{}/img/file-ext-generic.png'.format(self.assets_url)
121+
122+
node.update({
123+
'icon': icon_path,
124+
'data': {
125+
'date': date,
126+
'size': size,
127+
},
128+
})
103129

104130
@staticmethod
105-
def icon_exists_for_type(ext: str) -> bool:
131+
def icon_exists(ext: str) -> bool:
106132
"""Check if an icon exists for the given file type. The extension string is converted to
107133
lower case.
108134
@@ -119,28 +145,44 @@ def icon_exists_for_type(ext: str) -> bool:
119145
))
120146

121147
@staticmethod
122-
def sanitize_file_list(file_list: list) -> list:
148+
def sanitize_obj_list(obj_list: list) -> list:
123149
"""Remove macOS system and temporary files. Current implementation only removes '__MACOSX/'
124150
and '.DS_Store'. If necessary, extend the sanitizer to exclude more file types.
125151
126-
:param file_list: the list of the path for each file and folder in the zip
152+
:param obj_list: a list of full paths for each file and folder in the zip
127153
:rtype: ``list``
128154
:return: a sanitized list
129155
"""
130156

131-
sanitized_file_list = []
157+
sanitized_obj_list = []
132158

133-
for file in file_list:
159+
for obj in obj_list:
134160

135-
file_path = file.filename
161+
obj_path = obj.filename
136162
# Ignore macOS '__MACOSX' folder for zip file
137-
if file_path.startswith('__MACOSX/'):
163+
if obj_path.startswith('__MACOSX/'):
138164
continue
139-
140165
# Ignore macOS '.DS_STORE' file
141-
if file_path == '.DS_Store' or file_path.endswith('/.DS_Store'):
166+
if obj_path == '.DS_Store' or obj_path.endswith('/.DS_Store'):
142167
continue
143168

144-
sanitized_file_list.append(file)
169+
sanitized_obj_list.append(obj)
170+
171+
return sanitized_obj_list
172+
173+
@staticmethod
174+
def find_node_among_siblings(segment: str, siblings: list) -> Union[dict, None]:
175+
"""Find if the folder or file represented by the path segment has already been added.
176+
177+
:param segment: the path segment
178+
:param siblings: the list containing all added sibling nodes
179+
:rtype: ``Union[dict, None]``
180+
:return: the node if found or ``None`` otherwise
181+
"""
182+
183+
for sibling in siblings:
184+
185+
if sibling.get('text', '') == segment:
186+
return sibling
145187

146-
return sanitized_file_list
188+
return None

0 commit comments

Comments
 (0)