diff --git a/aiu_fms_testing_utils/utils/__init__.py b/aiu_fms_testing_utils/utils/__init__.py index 1bc76acf..c1aa443c 100644 --- a/aiu_fms_testing_utils/utils/__init__.py +++ b/aiu_fms_testing_utils/utils/__init__.py @@ -37,17 +37,17 @@ def __download_file(url, filename): try: response = requests.get(url, stream=True) response.raise_for_status() - + with open(filename, 'wb') as file: for chunk in response.iter_content(chunk_size=8192): file.write(chunk) print(f"Successfully downloaded {filename}") - + except requests.exceptions.RequestException as e: print(f"An error occurred: {e}") def __sample_requests( - prompt_list: List[str], + prompt_list: List[str], num_requests: int, tokenizer: BaseTokenizer, prompt_length_min: int = 32, @@ -67,15 +67,15 @@ def __sample_requests( # Tokenize the prompts and completions. prompt = prompt_list[i] prompt_token_ids = ids_for_prompt(prompt, tokenizer) - + prompt_len = len(prompt_token_ids) if prompt_len < prompt_length_min or prompt_len > prompt_length_max: # Prune too short or too long sequences. continue filtered_dataset.append((prompt, prompt_len)) - + return filtered_dataset - + def sample_sharegpt_requests( @@ -96,15 +96,15 @@ def sample_sharegpt_requests( # Filter out the conversations with less than 2 turns. dataset = [data for data in dataset if len(data["conversations"]) >= 2] dataset = [data["conversations"][0]["value"] for data in dataset] - + return __sample_requests(dataset, num_requests, tokenizer, prompt_length_min, prompt_length_max, seed) def sample_squad_v2_qa_requests( dataset_path: str, - num_requests: int, - tokenizer: BaseTokenizer, - prompt_length_min: int = 32, - prompt_length_max: int = 64, + num_requests: int, + tokenizer: BaseTokenizer, + prompt_length_min: int = 32, + prompt_length_max: int = 64, seed: Optional[int] = None ) -> List[Tuple[str, int]]: from datasets import load_dataset @@ -113,10 +113,10 @@ def sample_squad_v2_qa_requests( ds = load_dataset(dataset_path)['train'] else: ds = load_dataset("rajpurkar/squad_v2", cache_dir=dataset_path)['train'] - - + + ds = [f"{data['context']}\n{data['question']}" for data in ds] return __sample_requests(ds, num_requests, tokenizer, prompt_length_min, prompt_length_max, seed) - + diff --git a/scripts/generate_metrics.py b/scripts/generate_metrics.py index 0ed30101..8694cb88 100644 --- a/scripts/generate_metrics.py +++ b/scripts/generate_metrics.py @@ -27,6 +27,7 @@ type=str, default=None, help="The model variant (configuration) to benchmark. E.g. 7b, 13b, 70b.", + required=True, ) parser.add_argument( "--model_path", @@ -37,12 +38,13 @@ "--model_source", type=str, help="Source of the checkpoint. E.g. 'meta', 'hf', None", + required=False, ) parser.add_argument( "--tokenizer", type=str, - required=True, help="Path to the tokenizer (e.g. ~/tokenizer.model)", + required=True, ) parser.add_argument( "--default_dtype", @@ -50,52 +52,61 @@ default=None, choices=["bf16", "fp16", "fp32"], help="If set to one of the choices, overrides the model checkpoint weight format by setting the default pytorch format", + required=False, ) parser.add_argument( "--batch_size", type=int, default=1, help="size of input batch", + required=False, ) parser.add_argument( "--min_pad_length", type=int, help="Pad inputs to a minimum specified length. If any prompt is larger than the specified length, padding will be determined by the largest prompt", default=0, + required=False, ) parser.add_argument( "--max_new_tokens", type=int, help="max number of generated tokens", default=100, + required=False, ) parser.add_argument( "--sharegpt_path", type=str, - help="path to sharegpt data json", + help="path to sharegpt data json. If it is not available, then use target path", + required=True, ) parser.add_argument( "--output_dir", type=str, help="output directory", + required=True, ) parser.add_argument( "--topk_per_token", type=int, help="top k values per token to generate loss on", - default=20 + default=20, + required=False, ) parser.add_argument( "--num_test_tokens_per_sequence", type=int, help="number of tokens in test. For instance, if max_new_tokens=128 and num_test_tokens_per_sequence=256, this means we will generate data over 2 sample prompts. If not set, will be set to max_new_tokens", - default=None + default=None, + required=False, ) parser.add_argument( "--extra_get_model_kwargs", nargs='*', default={}, - help="Use this to override model configuration values to get model. Example: --extra_get_model_kwargs nlayers=2,..." + help="Use this to override model configuration values to get model. Example: --extra_get_model_kwargs nlayers=2,...", + required=False, ) args = parser.parse_args() @@ -129,6 +140,12 @@ torch.set_grad_enabled(False) +# As per FMS check https://github.com/foundation-model-stack/foundation-model-stack/blob/ec55d3f4d2a620346a1eb003699db0b0d47e2598/fms/models/__init__.py#L88 +# we need to remove variant if model_arg or model_path is provided +if args.model_path and args.variant: + print("Both variant and model path provided. Removing variant") + args.variant = None + # prepare the cuda model cuda_model = get_model( architecture=args.architecture, @@ -211,14 +228,14 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, seed=0): failed_responses = validate_level_0(cpu_static_tokens, cuda_static_tokens) print("extracted cuda validation information level 0") -if len(failed_responses) != 0: +if len(failed_responses) != 0: print_failed_cases(failed_responses, cpu_static_tokens, cuda_static_tokens, tokenizer) def write_csv(l, path, metric): with open(path, 'w') as f: f.write(f'{metric}\n') for t in l: - f.write(f"{t[2].item()}\n") + f.write(f"{t[2].item()}\n") f.close() num_test_tokens_per_sequence = args.num_test_tokens_per_sequence