From 2b1a49486779e73bc27b5f8ea51aff9921277d49 Mon Sep 17 00:00:00 2001 From: Megh-Thakkar Date: Mon, 25 Aug 2025 01:09:37 -0400 Subject: [PATCH 1/2] maximum value mode in validate --- src/browsergym/workarena/tasks/dashboard.py | 231 ++++++++++++++------ 1 file changed, 163 insertions(+), 68 deletions(-) diff --git a/src/browsergym/workarena/tasks/dashboard.py b/src/browsergym/workarena/tasks/dashboard.py index 68000d0..113b55a 100644 --- a/src/browsergym/workarena/tasks/dashboard.py +++ b/src/browsergym/workarena/tasks/dashboard.py @@ -1,27 +1,24 @@ import json import logging -import numpy as np -import playwright.sync_api import re - from abc import ABC, abstractmethod -from tenacity import retry, stop_after_attempt, wait_fixed from typing import List, Tuple -from urllib import parse -from .base import AbstractServiceNowTask -from .comp_building_block import CompositionalBuildingBlockTask -from .utils.utils import check_url_suffix_match +import numpy as np +import playwright.sync_api +from tenacity import retry, stop_after_attempt, wait_fixed from ..api.utils import table_api_call, table_column_info from ..config import ( DASHBOARD_RETRIEVAL_MINMAX_CONFIG_PATH, DASHBOARD_RETRIEVAL_VALUE_CONFIG_PATH, + REPORT_PATCH_FLAG, REPORT_RETRIEVAL_MINMAX_CONFIG_PATH, REPORT_RETRIEVAL_VALUE_CONFIG_PATH, - REPORT_PATCH_FLAG, ) from ..instance import SNowInstance +from .base import AbstractServiceNowTask +from .comp_building_block import CompositionalBuildingBlockTask from .utils.string import share_tri_gram from .utils.utils import check_url_suffix_match @@ -46,7 +43,11 @@ class DashboardRetrievalTask(AbstractServiceNowTask, ABC): """ def __init__( - self, seed: int = None, instance: SNowInstance = None, fixed_config: dict = None, **kwargs + self, + seed: int = None, + instance: SNowInstance = None, + fixed_config: dict = None, + **kwargs, ) -> None: super().__init__(seed=seed, instance=instance, start_rel_url="") self.iframe_id = "gsft_main" @@ -83,7 +84,12 @@ def _get_charts(self, page: playwright.sync_api.Page) -> None: f"{self.iframe_id}.Highcharts.charts.map((x) => {{if(x){{return [x.renderTo.ariaLabel, x.renderTo.id];}}}})" ) charts = [ - (title.replace("Highcharts interactive chart.", "").replace(".", "").strip(), id) + ( + title.replace("Highcharts interactive chart.", "") + .replace(".", "") + .strip(), + id, + ) for title, id in charts if title and iframe.locator(f"#{id}").count() @@ -118,7 +124,9 @@ def _read_chart(self, page: playwright.sync_api.Page, element_id: str) -> str: f"{self.iframe_id}.Highcharts.charts.find(chart => chart && chart.renderTo.id === '{element_id}').types" ) if len(set(types)) > 1: - raise NotImplementedError("Multiple chart types in the same chart not supported") + raise NotImplementedError( + "Multiple chart types in the same chart not supported" + ) type = types[0] if type not in SUPPORTED_PLOT_TYPES: raise NotImplementedError(f"Chart type {type} not supported") @@ -159,7 +167,9 @@ def _read_chart(self, page: playwright.sync_api.Page, element_id: str) -> str: data_point["label_name"].strip() if data_point["label_name"] else "" ) data_point["label_origx"] = ( - data_point["label_origx"].strip() if data_point["label_origx"] else "" + data_point["label_origx"].strip() + if data_point["label_origx"] + else "" ) # Determine which label to use (this is a heuristic) @@ -222,7 +232,9 @@ def _get_chart_by_title( chart_idx = [title.lower() for title, _ in charts].index(title.lower()) # Load chart data - return *self._read_chart(page, element_id=charts[chart_idx][1]), charts[chart_idx][1] + return *self._read_chart(page, element_id=charts[chart_idx][1]), charts[ + chart_idx + ][1] def _wait_for_ready(self, page: playwright.sync_api.Page) -> None: """ @@ -245,59 +257,61 @@ def _wait_for_ready(self, page: playwright.sync_api.Page) -> None: logging.debug("Detected Highcharts API ready") logging.debug("Waiting for all plots to be loaded available") - page.wait_for_function(f"window.{self.iframe_id}.WORKARENA_HIGHCHARTS_ALL_LOADED") + page.wait_for_function( + f"window.{self.iframe_id}.WORKARENA_HIGHCHARTS_ALL_LOADED" + ) logging.debug("All plots loaded") def get_init_scripts(self) -> List[str]: return super().get_init_scripts() + [ "registerGsftMainLoaded();", - f""" - async function renderAllCharts() {{ + """ + async function renderAllCharts() { waLog('Forcing load of all charts', 'loadAllCharts'); await waitForCondition(() => window.WORKARENA_LOAD_COMPLETE, 100); const canvas = window.SNC.canvas; - if (canvas) {{ + if (canvas) { waLog('This is a dashboard page.', 'loadAllCharts'); // Trigger the rendering of each widget canvas.layoutJson.panes.forEach((p) => canvas.canvasUtils.renderSlowWidget(canvas.canvasUtils.getWidgetContainer(p.uuid))); // Wait for all widgets to be rendered await waitForCondition(() => window.SNC.canvas.layoutJson.panes.map((p) => p.isRendered).every(value => value == true), 100); - }} - else {{ + } + else { waLog('This is a report page.', 'loadAllCharts'); // Wait for axes to be visible (we need to use this approach since there is no canvas to help us) await waitForCondition(() => document.body.innerText.toLowerCase().includes("no data to display") || document.querySelectorAll(".highcharts-point").length > 0, 100); - }} + } // Wait for Highcharts to say that the charts are rendered waitForCondition(() => Highcharts.charts.all((c) => c.hasLoaded), 100) - .then(() => {{ + .then(() => { window.WORKARENA_HIGHCHARTS_ALL_LOADED = true; waLog('All charts loaded', 'loadAllCharts'); - }}); - }} + }); + } // Run on both dashboard and reports pages runInGsftMainOnlyAndProtectByURL(renderAllCharts, 'pa_dashboard.do'); runInGsftMainOnlyAndProtectByURL(renderAllCharts, 'sys_report_template.do'); """, - f""" - function purifyReportUIButtons() {{ + """ + function purifyReportUIButtons() { // Delete a lot of UI features that were causing issues due to the report refreshing without // reloading the page. This makes the task easier, but it doesn't matter because we really // want to evaluate retrieval and this doesn't prevent that. - document.querySelectorAll('[ng-click*="main.runReport"], #sidebar, #nlq-over-cb, #open-tree-navigation-button, .data-filtering-wrap').forEach(element => {{ - if (element && element.parentNode) {{ + document.querySelectorAll('[ng-click*="main.runReport"], #sidebar, #nlq-over-cb, #open-tree-navigation-button, .data-filtering-wrap').forEach(element => { + if (element && element.parentNode) { element.parentNode.removeChild(element); - }} - }}); - document.addEventListener('click', function(event) {{ + } + }); + document.addEventListener('click', function(event) { event.stopPropagation(); event.preventDefault(); - }}, true); + }, true); waLog('Purified report UI.', 'purifyReportUIButtons'); - }} + } // Run it only on the reports page runInGsftMainOnlyAndProtectByURL(purifyReportUIButtons, 'sys_report_template.do'); """, @@ -315,7 +329,9 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Configure task # ... sample a configuration self.config = ( - self.fixed_config if self.fixed_config else self.random.choice(self.all_configs()) + self.fixed_config + if self.fixed_config + else self.random.choice(self.all_configs()) ) # ... set start URL based on config # ...... some of the reports have need a date filter to be applied so we do this by patching a placeholder in the URL @@ -325,11 +341,11 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Produce goal string based on question type chart_locator = ( - f"the \"{self.config['chart_series']}\" series of " + f'the "{self.config["chart_series"]}" series of ' if self.config["chart_series"] else "" ) + ( - f"the \"{self.config['chart_title']}\" chart" + f'the "{self.config["chart_title"]}" chart' if self.config["chart_title"] else "the chart" ) @@ -347,7 +363,9 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: elif self.config["question"] == "mode": goal = f"What is the mode value in {chart_locator}?" else: - raise NotImplementedError(f"Question type {self.config['question']} not supported") + raise NotImplementedError( + f"Question type {self.config['question']} not supported" + ) return goal, {} @@ -362,7 +380,9 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non # Open the report frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame() # Search for the report by title - frame.get_by_label("Search a specific field of the Reports list").select_option("Title") + frame.get_by_label( + "Search a specific field of the Reports list" + ).select_option("Title") search_input = frame.locator('input[aria-label="Search"]') search_input.click() search_input.fill(chart_title) @@ -371,7 +391,9 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non "typeof window.gsft_main !== 'undefined' && window.gsft_main.WORKARENA_LOAD_COMPLETE" ) # Click on the chart preview to open it - frame.wait_for_selector(f'a[aria-label="Preview record: {chart_title}"]').click() + frame.wait_for_selector( + f'a[aria-label="Preview record: {chart_title}"]' + ).click() page.wait_for_timeout(1000) page.keyboard.press("Enter") # Now in the form view, wait for the page to load and click to view the report @@ -417,21 +439,31 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non elif self.config["question"] == "max": max_point = max(chart_data, key=lambda x: x["count"]) chat_messages.append( - {"message": f"{max_point['label']}, {max_point['count']}", "role": "assistant"} + { + "message": f"{max_point['label']}, {max_point['count']}", + "role": "assistant", + } ) elif self.config["question"] == "min": min_point = min(chart_data, key=lambda x: x["count"]) chat_messages.append( - {"message": f"{min_point['label']}, {min_point['count']}", "role": "assistant"} + { + "message": f"{min_point['label']}, {min_point['count']}", + "role": "assistant", + } ) elif self.config["question"] == "mean": counts = [data["count"] for data in chart_data] target_count = np.mean(counts) - chat_messages.append({"message": f"Mean / Average {target_count}", "role": "assistant"}) + chat_messages.append( + {"message": f"Mean / Average {target_count}", "role": "assistant"} + ) elif self.config["question"] == "median": counts = [data["count"] for data in chart_data] target_count = np.median(counts) - chat_messages.append({"message": f"Median {target_count}", "role": "assistant"}) + chat_messages.append( + {"message": f"Median {target_count}", "role": "assistant"} + ) elif self.config["question"] == "mode": counts = [data["count"] for data in chart_data] # We select the maximum value if there are two or more modes @@ -454,9 +486,13 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non if frequency == max_frequency ] target_count = max(max_frequencies) - chat_messages.append({"message": f"Mode {target_count}", "role": "assistant"}) + chat_messages.append( + {"message": f"Mode {target_count}", "role": "assistant"} + ) else: - raise NotImplementedError(f"Question type \"{self.config['question']}\" not supported") + raise NotImplementedError( + f'Question type "{self.config["question"]}" not supported' + ) def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] @@ -514,7 +550,10 @@ def validate( response_ = response_.replace(label, "") # ... then we extract numbers response_floats = np.unique( - [float(x) for x in re.findall(r"[\d]+(?:[.,]\d+)?", response_.replace(",", ""))] + [ + float(x) + for x in re.findall(r"[\d]+(?:[.,]\d+)?", response_.replace(",", "")) + ] ) del response_ @@ -532,7 +571,9 @@ def validate( logging.debug("The question is a value question") # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = "Incorrect answer. More than one number detected in the response." + error_msg = ( + "Incorrect answer. More than one number detected in the response." + ) return 0.0, True, error_msg, {"message": error_msg} logging.debug( @@ -550,9 +591,14 @@ def validate( ][0] ) if np.isclose(expected_value, response_floats[0]): - return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} + return ( + 1.0, + True, + "Nice work, thank you!", + {"message": "Correct answer."}, + ) else: - return 0.0, True, f"Incorrect answer.", {"message": "Incorrect answer."} + return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} # ... validate max/min responses elif "max" in self.config["question"] or "min" in self.config["question"]: @@ -561,14 +607,20 @@ def validate( logging.debug(f"The question is a {str(target_func)} question") # Get the target count value (max or min) - target_count = float(target_func(chart_data, key=lambda x: x["count"])["count"]) + target_count = float( + target_func(chart_data, key=lambda x: x["count"])["count"] + ) # Find all points with the target count value - target_points = [point for point in chart_data if point["count"] == target_count] + target_points = [ + point for point in chart_data if point["count"] == target_count + ] # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = "Incorrect answer. More than one number detected in the response." + error_msg = ( + "Incorrect answer. More than one number detected in the response." + ) return 0.0, True, error_msg, {"message": error_msg} # Check if any of these points are mentioned in the response @@ -576,7 +628,12 @@ def validate( if point["label"].lower() in response.lower() and np.isclose( target_count, response_floats[0] ): - return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} + return ( + 1.0, + True, + "Nice work, thank you!", + {"message": "Correct answer."}, + ) # If no correct point is mentioned in the response return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} @@ -592,24 +649,50 @@ def validate( elif self.config["question"] == "median": target_count = np.median(counts) elif self.config["question"] == "mode": - _vals, _counts = np.unique(counts, return_counts=True) - max_frequency_index = np.argmax(_counts) - target_count = -_vals[max_frequency_index] + # We select the maximum value if there are two or more modes + frequencies = {} + for count in counts: + if count not in frequencies: + frequencies[count] = 1 + else: + frequencies[count] += 1 + sorted_frequencies = { + count: frequency + for count, frequency in sorted( + frequencies.items(), key=lambda item: item[1], reverse=True + ) + } + max_frequency = list(sorted_frequencies.values())[0] + max_frequencies = [ + count + for count, frequency in sorted_frequencies.items() + if frequency == max_frequency + ] + target_count = max(max_frequencies) # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = "Incorrect answer. More than one number detected in the response." + error_msg = ( + "Incorrect answer. More than one number detected in the response." + ) return 0.0, True, error_msg, {"message": error_msg} # Check if any of these points are mentioned in the response if np.isclose(target_count, response_floats[0]): - return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} + return ( + 1.0, + True, + "Nice work, thank you!", + {"message": "Correct answer."}, + ) # If no correct point is mentioned in the response return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} else: - raise NotImplementedError(f"Question type \"{self.config['question']}\" not supported") + raise NotImplementedError( + f'Question type "{self.config["question"]}" not supported' + ) def teardown(self) -> None: return super().teardown() @@ -654,7 +737,9 @@ def _generate_random_config( ]: cols = [ x - for x, y in table_column_info(instance=self.instance, table=table).items() + for x, y in table_column_info( + instance=self.instance, table=table + ).items() if y.get("cangroup", False) and y.get("type", None) == "choice" and "upon" not in x.lower() @@ -721,16 +806,22 @@ def _generate_random_config( # Handle the case where a dashboard is not found page.wait_for_load_state("networkidle") iframe = page.frame(name=self.iframe_id) - assert iframe.get_by_text("not found").count() == 0, "Report or dashboard not found" + assert iframe.get_by_text("not found").count() == 0, ( + "Report or dashboard not found" + ) # Find all the charts self._wait_for_ready(page) charts = self._get_charts(page) # Randomly select a chart - assert len(charts) > 0, f"No charts found on the page {self.instance.snow_url}{url}" + assert len(charts) > 0, ( + f"No charts found on the page {self.instance.snow_url}{url}" + ) chart_idx = self.random.randint(0, len(charts)) - chart_title = charts[chart_idx][0] if not is_report else "" # No title for reports + chart_title = ( + charts[chart_idx][0] if not is_report else "" + ) # No title for reports _, chart_data, _ = self._get_chart_by_title(page, chart_title) # Select a series randomly @@ -740,10 +831,12 @@ def _generate_random_config( # Check if the data is interesting labels = [point["label"] for point in chart_data] - assert len(labels) > 1, f"Not enough data in the chart (only {len(labels)} label)" - assert not any( - l.isdigit() for l in labels - ), "Some chart labels are digits, which would cause errors in validation. Skipping." + assert len(labels) > 1, ( + f"Not enough data in the chart (only {len(labels)} label)" + ) + assert not any(label.isdigit() for label in labels), ( + "Some chart labels are digits, which would cause errors in validation. Skipping." + ) # Sample a type of question question = self.random.choice(question_types) @@ -809,7 +902,9 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Configure task # ... sample a configuration self.config = ( - self.fixed_config if self.fixed_config else self.random.choice(self.all_configs()) + self.fixed_config + if self.fixed_config + else self.random.choice(self.all_configs()) ) # ... set start URL based on config self.start_url = self.instance.snow_url + self.config["url"] From 36f2f58ca66cb8fdb9bcbe8c1fcf9f8af187b36b Mon Sep 17 00:00:00 2001 From: Megh Thakkar Date: Mon, 25 Aug 2025 01:29:05 -0400 Subject: [PATCH 2/2] Revert ruff formatting for clarity --- src/browsergym/workarena/tasks/dashboard.py | 208 ++++++-------------- 1 file changed, 65 insertions(+), 143 deletions(-) diff --git a/src/browsergym/workarena/tasks/dashboard.py b/src/browsergym/workarena/tasks/dashboard.py index 113b55a..1f1c611 100644 --- a/src/browsergym/workarena/tasks/dashboard.py +++ b/src/browsergym/workarena/tasks/dashboard.py @@ -1,24 +1,27 @@ import json import logging +import numpy as np +import playwright.sync_api import re + from abc import ABC, abstractmethod +from tenacity import retry, stop_after_attempt, wait_fixed from typing import List, Tuple +from urllib import parse -import numpy as np -import playwright.sync_api -from tenacity import retry, stop_after_attempt, wait_fixed +from .base import AbstractServiceNowTask +from .comp_building_block import CompositionalBuildingBlockTask +from .utils.utils import check_url_suffix_match from ..api.utils import table_api_call, table_column_info from ..config import ( DASHBOARD_RETRIEVAL_MINMAX_CONFIG_PATH, DASHBOARD_RETRIEVAL_VALUE_CONFIG_PATH, - REPORT_PATCH_FLAG, REPORT_RETRIEVAL_MINMAX_CONFIG_PATH, REPORT_RETRIEVAL_VALUE_CONFIG_PATH, + REPORT_PATCH_FLAG, ) from ..instance import SNowInstance -from .base import AbstractServiceNowTask -from .comp_building_block import CompositionalBuildingBlockTask from .utils.string import share_tri_gram from .utils.utils import check_url_suffix_match @@ -43,11 +46,7 @@ class DashboardRetrievalTask(AbstractServiceNowTask, ABC): """ def __init__( - self, - seed: int = None, - instance: SNowInstance = None, - fixed_config: dict = None, - **kwargs, + self, seed: int = None, instance: SNowInstance = None, fixed_config: dict = None, **kwargs ) -> None: super().__init__(seed=seed, instance=instance, start_rel_url="") self.iframe_id = "gsft_main" @@ -84,12 +83,7 @@ def _get_charts(self, page: playwright.sync_api.Page) -> None: f"{self.iframe_id}.Highcharts.charts.map((x) => {{if(x){{return [x.renderTo.ariaLabel, x.renderTo.id];}}}})" ) charts = [ - ( - title.replace("Highcharts interactive chart.", "") - .replace(".", "") - .strip(), - id, - ) + (title.replace("Highcharts interactive chart.", "").replace(".", "").strip(), id) for title, id in charts if title and iframe.locator(f"#{id}").count() @@ -124,9 +118,7 @@ def _read_chart(self, page: playwright.sync_api.Page, element_id: str) -> str: f"{self.iframe_id}.Highcharts.charts.find(chart => chart && chart.renderTo.id === '{element_id}').types" ) if len(set(types)) > 1: - raise NotImplementedError( - "Multiple chart types in the same chart not supported" - ) + raise NotImplementedError("Multiple chart types in the same chart not supported") type = types[0] if type not in SUPPORTED_PLOT_TYPES: raise NotImplementedError(f"Chart type {type} not supported") @@ -167,9 +159,7 @@ def _read_chart(self, page: playwright.sync_api.Page, element_id: str) -> str: data_point["label_name"].strip() if data_point["label_name"] else "" ) data_point["label_origx"] = ( - data_point["label_origx"].strip() - if data_point["label_origx"] - else "" + data_point["label_origx"].strip() if data_point["label_origx"] else "" ) # Determine which label to use (this is a heuristic) @@ -232,9 +222,7 @@ def _get_chart_by_title( chart_idx = [title.lower() for title, _ in charts].index(title.lower()) # Load chart data - return *self._read_chart(page, element_id=charts[chart_idx][1]), charts[ - chart_idx - ][1] + return *self._read_chart(page, element_id=charts[chart_idx][1]), charts[chart_idx][1] def _wait_for_ready(self, page: playwright.sync_api.Page) -> None: """ @@ -257,61 +245,59 @@ def _wait_for_ready(self, page: playwright.sync_api.Page) -> None: logging.debug("Detected Highcharts API ready") logging.debug("Waiting for all plots to be loaded available") - page.wait_for_function( - f"window.{self.iframe_id}.WORKARENA_HIGHCHARTS_ALL_LOADED" - ) + page.wait_for_function(f"window.{self.iframe_id}.WORKARENA_HIGHCHARTS_ALL_LOADED") logging.debug("All plots loaded") def get_init_scripts(self) -> List[str]: return super().get_init_scripts() + [ "registerGsftMainLoaded();", - """ - async function renderAllCharts() { + f""" + async function renderAllCharts() {{ waLog('Forcing load of all charts', 'loadAllCharts'); await waitForCondition(() => window.WORKARENA_LOAD_COMPLETE, 100); const canvas = window.SNC.canvas; - if (canvas) { + if (canvas) {{ waLog('This is a dashboard page.', 'loadAllCharts'); // Trigger the rendering of each widget canvas.layoutJson.panes.forEach((p) => canvas.canvasUtils.renderSlowWidget(canvas.canvasUtils.getWidgetContainer(p.uuid))); // Wait for all widgets to be rendered await waitForCondition(() => window.SNC.canvas.layoutJson.panes.map((p) => p.isRendered).every(value => value == true), 100); - } - else { + }} + else {{ waLog('This is a report page.', 'loadAllCharts'); // Wait for axes to be visible (we need to use this approach since there is no canvas to help us) await waitForCondition(() => document.body.innerText.toLowerCase().includes("no data to display") || document.querySelectorAll(".highcharts-point").length > 0, 100); - } + }} // Wait for Highcharts to say that the charts are rendered waitForCondition(() => Highcharts.charts.all((c) => c.hasLoaded), 100) - .then(() => { + .then(() => {{ window.WORKARENA_HIGHCHARTS_ALL_LOADED = true; waLog('All charts loaded', 'loadAllCharts'); - }); - } + }}); + }} // Run on both dashboard and reports pages runInGsftMainOnlyAndProtectByURL(renderAllCharts, 'pa_dashboard.do'); runInGsftMainOnlyAndProtectByURL(renderAllCharts, 'sys_report_template.do'); """, - """ - function purifyReportUIButtons() { + f""" + function purifyReportUIButtons() {{ // Delete a lot of UI features that were causing issues due to the report refreshing without // reloading the page. This makes the task easier, but it doesn't matter because we really // want to evaluate retrieval and this doesn't prevent that. - document.querySelectorAll('[ng-click*="main.runReport"], #sidebar, #nlq-over-cb, #open-tree-navigation-button, .data-filtering-wrap').forEach(element => { - if (element && element.parentNode) { + document.querySelectorAll('[ng-click*="main.runReport"], #sidebar, #nlq-over-cb, #open-tree-navigation-button, .data-filtering-wrap').forEach(element => {{ + if (element && element.parentNode) {{ element.parentNode.removeChild(element); - } - }); - document.addEventListener('click', function(event) { + }} + }}); + document.addEventListener('click', function(event) {{ event.stopPropagation(); event.preventDefault(); - }, true); + }}, true); waLog('Purified report UI.', 'purifyReportUIButtons'); - } + }} // Run it only on the reports page runInGsftMainOnlyAndProtectByURL(purifyReportUIButtons, 'sys_report_template.do'); """, @@ -329,9 +315,7 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Configure task # ... sample a configuration self.config = ( - self.fixed_config - if self.fixed_config - else self.random.choice(self.all_configs()) + self.fixed_config if self.fixed_config else self.random.choice(self.all_configs()) ) # ... set start URL based on config # ...... some of the reports have need a date filter to be applied so we do this by patching a placeholder in the URL @@ -341,11 +325,11 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Produce goal string based on question type chart_locator = ( - f'the "{self.config["chart_series"]}" series of ' + f"the \"{self.config['chart_series']}\" series of " if self.config["chart_series"] else "" ) + ( - f'the "{self.config["chart_title"]}" chart' + f"the \"{self.config['chart_title']}\" chart" if self.config["chart_title"] else "the chart" ) @@ -363,9 +347,7 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: elif self.config["question"] == "mode": goal = f"What is the mode value in {chart_locator}?" else: - raise NotImplementedError( - f"Question type {self.config['question']} not supported" - ) + raise NotImplementedError(f"Question type {self.config['question']} not supported") return goal, {} @@ -380,9 +362,7 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non # Open the report frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame() # Search for the report by title - frame.get_by_label( - "Search a specific field of the Reports list" - ).select_option("Title") + frame.get_by_label("Search a specific field of the Reports list").select_option("Title") search_input = frame.locator('input[aria-label="Search"]') search_input.click() search_input.fill(chart_title) @@ -391,9 +371,7 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non "typeof window.gsft_main !== 'undefined' && window.gsft_main.WORKARENA_LOAD_COMPLETE" ) # Click on the chart preview to open it - frame.wait_for_selector( - f'a[aria-label="Preview record: {chart_title}"]' - ).click() + frame.wait_for_selector(f'a[aria-label="Preview record: {chart_title}"]').click() page.wait_for_timeout(1000) page.keyboard.press("Enter") # Now in the form view, wait for the page to load and click to view the report @@ -439,31 +417,21 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non elif self.config["question"] == "max": max_point = max(chart_data, key=lambda x: x["count"]) chat_messages.append( - { - "message": f"{max_point['label']}, {max_point['count']}", - "role": "assistant", - } + {"message": f"{max_point['label']}, {max_point['count']}", "role": "assistant"} ) elif self.config["question"] == "min": min_point = min(chart_data, key=lambda x: x["count"]) chat_messages.append( - { - "message": f"{min_point['label']}, {min_point['count']}", - "role": "assistant", - } + {"message": f"{min_point['label']}, {min_point['count']}", "role": "assistant"} ) elif self.config["question"] == "mean": counts = [data["count"] for data in chart_data] target_count = np.mean(counts) - chat_messages.append( - {"message": f"Mean / Average {target_count}", "role": "assistant"} - ) + chat_messages.append({"message": f"Mean / Average {target_count}", "role": "assistant"}) elif self.config["question"] == "median": counts = [data["count"] for data in chart_data] target_count = np.median(counts) - chat_messages.append( - {"message": f"Median {target_count}", "role": "assistant"} - ) + chat_messages.append({"message": f"Median {target_count}", "role": "assistant"}) elif self.config["question"] == "mode": counts = [data["count"] for data in chart_data] # We select the maximum value if there are two or more modes @@ -486,13 +454,9 @@ def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> Non if frequency == max_frequency ] target_count = max(max_frequencies) - chat_messages.append( - {"message": f"Mode {target_count}", "role": "assistant"} - ) + chat_messages.append({"message": f"Mode {target_count}", "role": "assistant"}) else: - raise NotImplementedError( - f'Question type "{self.config["question"]}" not supported' - ) + raise NotImplementedError(f"Question type \"{self.config['question']}\" not supported") def validate( self, page: playwright.sync_api.Page, chat_messages: list[str] @@ -550,10 +514,7 @@ def validate( response_ = response_.replace(label, "") # ... then we extract numbers response_floats = np.unique( - [ - float(x) - for x in re.findall(r"[\d]+(?:[.,]\d+)?", response_.replace(",", "")) - ] + [float(x) for x in re.findall(r"[\d]+(?:[.,]\d+)?", response_.replace(",", ""))] ) del response_ @@ -571,9 +532,7 @@ def validate( logging.debug("The question is a value question") # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = ( - "Incorrect answer. More than one number detected in the response." - ) + error_msg = "Incorrect answer. More than one number detected in the response." return 0.0, True, error_msg, {"message": error_msg} logging.debug( @@ -591,14 +550,9 @@ def validate( ][0] ) if np.isclose(expected_value, response_floats[0]): - return ( - 1.0, - True, - "Nice work, thank you!", - {"message": "Correct answer."}, - ) + return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} else: - return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} + return 0.0, True, f"Incorrect answer.", {"message": "Incorrect answer."} # ... validate max/min responses elif "max" in self.config["question"] or "min" in self.config["question"]: @@ -607,20 +561,14 @@ def validate( logging.debug(f"The question is a {str(target_func)} question") # Get the target count value (max or min) - target_count = float( - target_func(chart_data, key=lambda x: x["count"])["count"] - ) + target_count = float(target_func(chart_data, key=lambda x: x["count"])["count"]) # Find all points with the target count value - target_points = [ - point for point in chart_data if point["count"] == target_count - ] + target_points = [point for point in chart_data if point["count"] == target_count] # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = ( - "Incorrect answer. More than one number detected in the response." - ) + error_msg = "Incorrect answer. More than one number detected in the response." return 0.0, True, error_msg, {"message": error_msg} # Check if any of these points are mentioned in the response @@ -628,12 +576,7 @@ def validate( if point["label"].lower() in response.lower() and np.isclose( target_count, response_floats[0] ): - return ( - 1.0, - True, - "Nice work, thank you!", - {"message": "Correct answer."}, - ) + return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} # If no correct point is mentioned in the response return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} @@ -672,27 +615,18 @@ def validate( # if more than one number is in the prompt, there is necessarily a false positive if len(response_floats) > 1: - error_msg = ( - "Incorrect answer. More than one number detected in the response." - ) + error_msg = "Incorrect answer. More than one number detected in the response." return 0.0, True, error_msg, {"message": error_msg} # Check if any of these points are mentioned in the response if np.isclose(target_count, response_floats[0]): - return ( - 1.0, - True, - "Nice work, thank you!", - {"message": "Correct answer."}, - ) + return 1.0, True, "Nice work, thank you!", {"message": "Correct answer."} # If no correct point is mentioned in the response return 0.0, True, "Incorrect answer.", {"message": "Incorrect answer."} else: - raise NotImplementedError( - f'Question type "{self.config["question"]}" not supported' - ) + raise NotImplementedError(f"Question type \"{self.config['question']}\" not supported") def teardown(self) -> None: return super().teardown() @@ -737,9 +671,7 @@ def _generate_random_config( ]: cols = [ x - for x, y in table_column_info( - instance=self.instance, table=table - ).items() + for x, y in table_column_info(instance=self.instance, table=table).items() if y.get("cangroup", False) and y.get("type", None) == "choice" and "upon" not in x.lower() @@ -806,22 +738,16 @@ def _generate_random_config( # Handle the case where a dashboard is not found page.wait_for_load_state("networkidle") iframe = page.frame(name=self.iframe_id) - assert iframe.get_by_text("not found").count() == 0, ( - "Report or dashboard not found" - ) + assert iframe.get_by_text("not found").count() == 0, "Report or dashboard not found" # Find all the charts self._wait_for_ready(page) charts = self._get_charts(page) # Randomly select a chart - assert len(charts) > 0, ( - f"No charts found on the page {self.instance.snow_url}{url}" - ) + assert len(charts) > 0, f"No charts found on the page {self.instance.snow_url}{url}" chart_idx = self.random.randint(0, len(charts)) - chart_title = ( - charts[chart_idx][0] if not is_report else "" - ) # No title for reports + chart_title = charts[chart_idx][0] if not is_report else "" # No title for reports _, chart_data, _ = self._get_chart_by_title(page, chart_title) # Select a series randomly @@ -831,12 +757,10 @@ def _generate_random_config( # Check if the data is interesting labels = [point["label"] for point in chart_data] - assert len(labels) > 1, ( - f"Not enough data in the chart (only {len(labels)} label)" - ) - assert not any(label.isdigit() for label in labels), ( - "Some chart labels are digits, which would cause errors in validation. Skipping." - ) + assert len(labels) > 1, f"Not enough data in the chart (only {len(labels)} label)" + assert not any( + l.isdigit() for l in labels + ), "Some chart labels are digits, which would cause errors in validation. Skipping." # Sample a type of question question = self.random.choice(question_types) @@ -902,9 +826,7 @@ def setup_goal(self, page: playwright.sync_api.Page) -> Tuple[str | dict]: # Configure task # ... sample a configuration self.config = ( - self.fixed_config - if self.fixed_config - else self.random.choice(self.all_configs()) + self.fixed_config if self.fixed_config else self.random.choice(self.all_configs()) ) # ... set start URL based on config self.start_url = self.instance.snow_url + self.config["url"]