Skip to content

Commit 6a10d75

Browse files
authored
Merge pull request #49 from hhslepicka/fixes-and-features
New Utilities, Improved Docs and Bug Fix
2 parents 0e63174 + 5339c5f commit 6a10d75

File tree

8 files changed

+349
-21
lines changed

8 files changed

+349
-21
lines changed

botcity/web/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from .bot import WebBot, Browser, BROWSER_CONFIGS, By # noqa: F401, F403
2+
from .parsers import table_to_dict, data_from_row, sanitize_header # noqa: F401, F403
3+
from .util import element_as_select # noqa: F401, F403
24

35
from botcity.web._version import get_versions
46
__version__ = get_versions()['version']

botcity/web/bot.py

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import shutil
1111
import time
1212
from typing import List
13+
from contextlib import contextmanager
1314

1415
from botcity.base import BaseBot, State
1516
from botcity.base.utils import only_if_element
@@ -21,7 +22,8 @@
2122
from selenium.webdriver.common.by import By
2223
from selenium.webdriver.common.keys import Keys
2324
from selenium.webdriver.remote.webelement import WebElement
24-
from selenium.webdriver.support.ui import WebDriverWait
25+
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException, NoSuchElementException
26+
from selenium.webdriver.support import expected_conditions as EC
2527

2628
from . import config, cv2find
2729
from .browsers import BROWSER_CONFIGS, Browser
@@ -233,11 +235,16 @@ def check_driver():
233235
def stop_browser(self):
234236
"""
235237
Stops the Chrome browser and clean up the User Data Directory.
238+
239+
Warning:
240+
After invoking this method, you will need to reassign your custom options and capabilities.
236241
"""
237242
if not self._driver:
238243
return
239244
self._driver.close()
240245
self._driver.quit()
246+
self.options = None
247+
self.capabilities = None
241248
self._driver = None
242249

243250
def set_screen_resolution(self, width=None, height=None):
@@ -854,6 +861,26 @@ def browse(self, url):
854861
"""
855862
self.navigate_to(url)
856863

864+
@contextmanager
865+
def wait_for_new_page(self, waiting_time=10000, activate=True):
866+
"""Context manager to wait for a new page to load and activate it.
867+
868+
Args:
869+
waiting_time (int, optional): The maximum waiting time. Defaults to 10000.
870+
activate (bool, optional): Whether or not to activate the new page. Defaults to True.
871+
872+
"""
873+
tabs = self.get_tabs()
874+
yield
875+
start_time = time.time()
876+
while tabs == self.get_tabs():
877+
elapsed_time = (time.time() - start_time) * 1000
878+
if elapsed_time > waiting_time:
879+
return None
880+
time.sleep(0.1)
881+
if activate:
882+
self.activate_tab(self.get_tabs()[-1])
883+
857884
def execute_javascript(self, code):
858885
"""
859886
Execute the given javascript code.
@@ -1032,15 +1059,19 @@ def wait_for_downloads(self, timeout: int = 120000):
10321059

10331060
wait_method = BROWSER_CONFIGS.get(self.browser).get("wait_for_downloads")
10341061
# waits for all the files to be completed
1035-
WebDriverWait(self._driver, timeout/1000, 1).until(wait_method)
1062+
WebDriverWait(self._driver, timeout/1000.0, 1).until(wait_method)
10361063

1037-
def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElement]:
1064+
def find_elements(self, selector: str, by: By = By.CSS_SELECTOR,
1065+
waiting_time=10000, ensure_visible: bool = True) -> List[WebElement]:
10381066
"""Find elements using the specified selector with selector type specified by `by`.
10391067
10401068
Args:
10411069
selector (str): The selector string to be used.
10421070
by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
10431071
[See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
1072+
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
1073+
Defaults to 10000ms (10s).
1074+
ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
10441075
10451076
Returns:
10461077
List[WebElement]: List of elements found.
@@ -1054,16 +1085,36 @@ def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElem
10541085
...
10551086
```
10561087
"""
1057-
return self._driver.find_elements(by, selector)
1088+
if ensure_visible:
1089+
condition = EC.visibility_of_all_elements_located
1090+
else:
1091+
condition = EC.presence_of_all_elements_located
10581092

1059-
def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
1093+
try:
1094+
elements = WebDriverWait(
1095+
self._driver, timeout=waiting_time / 1000.0
1096+
).until(
1097+
condition((by, selector))
1098+
)
1099+
return elements
1100+
except (TimeoutException, NoSuchElementException) as ex:
1101+
print("Exception on find_elements", ex)
1102+
return None
1103+
1104+
def find_element(self, selector: str, by: str = By.CSS_SELECTOR, waiting_time=10000,
1105+
ensure_visible: bool = False, ensure_clickable: bool = False) -> WebElement:
10601106
"""Find an element using the specified selector with selector type specified by `by`.
10611107
If more than one element is found, the first instance is returned.
10621108
10631109
Args:
10641110
selector (str): The selector string to be used.
10651111
by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
10661112
[See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
1113+
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
1114+
Defaults to 10000ms (10s).
1115+
ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to False.
1116+
ensure_clickable (bool, optional): Whether to wait for the element to be clickable. Defaults to False.
1117+
If True, `ensure_clickable` takes precedence over `ensure_visible`.
10671118
10681119
Returns:
10691120
WebElement: The element found.
@@ -1079,9 +1130,47 @@ def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
10791130
...
10801131
```
10811132
"""
1082-
out = self.find_elements(selector=selector, by=by)
1083-
if out:
1084-
return out[0]
1133+
condition = EC.visibility_of_element_located if ensure_visible else EC.presence_of_element_located
1134+
condition = EC.element_to_be_clickable if ensure_clickable else condition
1135+
1136+
try:
1137+
element = WebDriverWait(
1138+
self._driver, timeout=waiting_time/1000.0
1139+
).until(
1140+
condition((by, selector))
1141+
)
1142+
return element
1143+
except (TimeoutException, NoSuchElementException):
1144+
return None
1145+
1146+
def wait_for_stale_element(self, element: WebElement, timeout: int = 10000):
1147+
"""
1148+
Wait until the WebElement element becomes stale (outdated).
1149+
1150+
Args:
1151+
element (WebElement): The element to monitor for staleness.
1152+
timeout (int, optional): Timeout in millis. Defaults to 120000.
1153+
"""
1154+
try:
1155+
WebDriverWait(self._driver, timeout=timeout/1000.0).until(EC.staleness_of(element))
1156+
except (TimeoutException, NoSuchElementException):
1157+
pass
1158+
1159+
def wait_for_element_visibility(self, element: WebElement, visible: bool = True, waiting_time=10000):
1160+
"""Wait for the element to be visible or hidden.
1161+
1162+
Args:
1163+
element (WebElement): The element to wait for.
1164+
visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
1165+
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
1166+
Defaults to 10000ms (10s).
1167+
"""
1168+
if visible:
1169+
wait_method = EC.visibility_of
1170+
else:
1171+
wait_method = EC.invisibility_of_element
1172+
1173+
WebDriverWait(self._driver, timeout=waiting_time/1000.0).until(wait_method(element))
10851174

10861175
def set_file_input_element(self, element: WebElement, filepath: str):
10871176
"""Configure the filepath for upload in a file element.

botcity/web/parsers.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import collections
2+
import string
3+
from typing import Dict, List
4+
from selenium.webdriver.remote.webelement import WebElement
5+
6+
7+
def data_from_row(row: WebElement, cell_tag="td") -> List[str]:
8+
"""Extract data from a row and return it as a list.
9+
10+
Args:
11+
row (WebElement): The row element.
12+
cell_tag (str, optional): The HTML tag associated with the row cells. Defaults to "td".
13+
14+
Returns:
15+
list: List of strings with the contents.
16+
"""
17+
return [
18+
col.text for col in row.find_elements_by_tag_name(cell_tag)
19+
]
20+
21+
22+
def sanitize_header(labels: List[str]):
23+
"""Sanitize header labels."""
24+
# Handle Treat Empty Header
25+
for idx, label in enumerate(labels):
26+
if label.strip():
27+
# make it lowercase
28+
label = label.lower()
29+
30+
# remove punctuations
31+
label = ''.join([l for l in label if l not in string.punctuation]) # noqa: E741
32+
33+
# replace spaces with underscores
34+
label = label.replace(" ", "_")
35+
else:
36+
label = f"col_{idx}"
37+
labels[idx] = label
38+
39+
# Deduplicate by adding _1, _2, _3 to repeated labels
40+
counts = {k: v for k, v in collections.Counter(labels).items() if v > 1}
41+
for i in reversed(range(len(labels))):
42+
item = labels[i]
43+
if item in counts and counts[item]:
44+
labels[i] = f"{item}_{counts[item]}"
45+
counts[item] -= 1
46+
47+
return labels
48+
49+
50+
def table_to_dict(table: WebElement, has_header: bool = True,
51+
skip_rows: int = 0, header_tag: str = "th") -> List[Dict]:
52+
"""Convert a table WebElement to a dict of lists.
53+
54+
Args:
55+
table (WebElement): The table element.
56+
has_header (bool, optional): Whether or not to parse a header. Defaults to True.
57+
skip_rows (int, optional): Number of rows to skip from the top. Defaults to 0.
58+
header_tag (str, optional): The HTML tag associated with the header cell. Defaults to "th".
59+
60+
Returns:
61+
list: List with dict for each row.
62+
"""
63+
64+
# Collect all rows from table
65+
rows = table.find_elements_by_tag_name("tr")
66+
67+
# Skip rows if informed
68+
if skip_rows:
69+
rows = rows[skip_rows:]
70+
71+
# Parse header labels
72+
if has_header:
73+
# Read header labels
74+
labels = data_from_row(rows[0], cell_tag=header_tag)
75+
# Sanitize headers
76+
labels = sanitize_header(labels)
77+
# Skip the header
78+
rows = rows[1:]
79+
else:
80+
# Make up header labels
81+
num_cols = len(rows[0].find_elements_by_tag_name("td"))
82+
labels = [f"col_{i}" for i in range(num_cols)]
83+
84+
# Assemble output dictionary
85+
out_list = []
86+
for row in rows:
87+
row_data = data_from_row(row)
88+
out_list.append(dict(zip(labels, row_data)))
89+
90+
return out_list

botcity/web/util.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,31 @@
11
import shutil
2+
import tempfile
23

4+
from selenium.webdriver.remote.webelement import WebElement
5+
from selenium.webdriver.support.select import Select
36

4-
def cleanup_temp_dir(temp_dir):
7+
8+
def cleanup_temp_dir(temp_dir: tempfile.TemporaryDirectory) -> None:
9+
"""
10+
Deletes the temporary directory and all its contents.
11+
12+
Args:
13+
temp_dir (tempfile.TemporaryDirectory): The temporary directory to delete.
14+
"""
515
if temp_dir:
616
try:
717
temp_dir.cleanup()
818
except OSError:
919
shutil.rmtree(temp_dir.name, ignore_errors=True)
20+
21+
22+
def element_as_select(element: WebElement) -> Select:
23+
"""Wraps a WebElement in a Select object.
24+
25+
Args:
26+
element (WebElement): The element to wrap.
27+
28+
Returns:
29+
Select: The Select object.
30+
"""
31+
return Select(element)

docs/forms.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Interacting with Forms
2+
3+
When dealing with forms, we often need to fill in the form and submit it.
4+
5+
While most of the operations are trivial, there are some things that are not such as selecting a select element or dealing with file uploads.
6+
7+
For that we developed some utilitary functions that you can use.
8+
9+
## Select Element
10+
11+
After grabing the element via the `find_element` or `find_elements` functions, we can use the `element_as_select` to convert it into a `Select` object.
12+
13+
::: botcity.web.util.element_as_select
14+
15+
### Example usage
16+
17+
```python
18+
# Import the function
19+
from botcity.web.util import element_as_select
20+
...
21+
# Fetch the select element
22+
element = self.find_element("select", By.TAG_NAME)
23+
# Convert the element into a Select object
24+
select_element = element_as_select(element)
25+
# Select the option based on visible text
26+
select_element.select_by_visible_text("Option 1")
27+
...
28+
```
29+
30+
## File Upload
31+
32+
After grabing the element via the `find_element` or `find_elements` functions, we can use the `set_file_input_element` to assign the file path to the element.
33+
34+
### Example usage
35+
36+
```python
37+
from botcity.web import By
38+
...
39+
# Find the input element of type `file` using CSS_SELECTOR.
40+
elem = self.find_element("body > form > input[type=file]", By.CSS_SELECTOR)
41+
# Configure the file to be used when processing the upload
42+
self.set_file_input_element(elem, "./test.txt")
43+
...
44+
```

0 commit comments

Comments
 (0)