|
27 | 27 | type=str, |
28 | 28 | default=None, |
29 | 29 | help="The model variant (configuration) to benchmark. E.g. 7b, 13b, 70b.", |
| 30 | + required=True, |
30 | 31 | ) |
31 | 32 | parser.add_argument( |
32 | 33 | "--model_path", |
|
37 | 38 | "--model_source", |
38 | 39 | type=str, |
39 | 40 | help="Source of the checkpoint. E.g. 'meta', 'hf', None", |
| 41 | + required=False, |
40 | 42 | ) |
41 | 43 | parser.add_argument( |
42 | 44 | "--tokenizer", |
43 | 45 | type=str, |
44 | | - required=True, |
45 | 46 | help="Path to the tokenizer (e.g. ~/tokenizer.model)", |
| 47 | + required=True, |
46 | 48 | ) |
47 | 49 | parser.add_argument( |
48 | 50 | "--default_dtype", |
49 | 51 | type=str, |
50 | 52 | default=None, |
51 | 53 | choices=["bf16", "fp16", "fp32"], |
52 | 54 | help="If set to one of the choices, overrides the model checkpoint weight format by setting the default pytorch format", |
| 55 | + required=False, |
53 | 56 | ) |
54 | 57 | parser.add_argument( |
55 | 58 | "--batch_size", |
56 | 59 | type=int, |
57 | 60 | default=1, |
58 | 61 | help="size of input batch", |
| 62 | + required=False, |
59 | 63 | ) |
60 | 64 | parser.add_argument( |
61 | 65 | "--min_pad_length", |
62 | 66 | type=int, |
63 | 67 | help="Pad inputs to a minimum specified length. If any prompt is larger than the specified length, padding will be determined by the largest prompt", |
64 | 68 | default=0, |
| 69 | + required=False, |
65 | 70 | ) |
66 | 71 | parser.add_argument( |
67 | 72 | "--max_new_tokens", |
68 | 73 | type=int, |
69 | 74 | help="max number of generated tokens", |
70 | 75 | default=100, |
| 76 | + required=False, |
71 | 77 | ) |
72 | 78 | parser.add_argument( |
73 | 79 | "--sharegpt_path", |
74 | 80 | type=str, |
75 | 81 | help="path to sharegpt data json", |
| 82 | + required=False, |
76 | 83 | ) |
77 | 84 | parser.add_argument( |
78 | 85 | "--output_dir", |
79 | 86 | type=str, |
80 | 87 | help="output directory", |
| 88 | + required=True, |
81 | 89 | ) |
82 | 90 | parser.add_argument( |
83 | 91 | "--topk_per_token", |
84 | 92 | type=int, |
85 | 93 | help="top k values per token to generate loss on", |
86 | | - default=20 |
| 94 | + default=20, |
| 95 | + required=False, |
87 | 96 | ) |
88 | 97 | parser.add_argument( |
89 | 98 | "--num_test_tokens_per_sequence", |
90 | 99 | type=int, |
91 | 100 | help="number of tokens in test. For instance, if max_new_tokens=128 and num_test_tokens_per_sequence=256, this means we will generate data over 2 sample prompts. If not set, will be set to max_new_tokens", |
92 | | - default=None |
| 101 | + default=None, |
| 102 | + required=False, |
93 | 103 | ) |
94 | 104 | parser.add_argument( |
95 | 105 | "--extra_get_model_kwargs", |
96 | 106 | nargs='*', |
97 | 107 | default={}, |
98 | | - help="Use this to override model configuration values to get model. Example: --extra_get_model_kwargs nlayers=2,..." |
| 108 | + help="Use this to override model configuration values to get model. Example: --extra_get_model_kwargs nlayers=2,...", |
| 109 | + required=False, |
99 | 110 | ) |
100 | 111 | args = parser.parse_args() |
101 | 112 |
|
|
129 | 140 |
|
130 | 141 | torch.set_grad_enabled(False) |
131 | 142 |
|
| 143 | +# As per FMS check https://github.com/foundation-model-stack/foundation-model-stack/blob/ec55d3f4d2a620346a1eb003699db0b0d47e2598/fms/models/__init__.py#L88 |
| 144 | +# we need to remove variant if model_arg or model_path is provided |
| 145 | +if args.model_path and args.variant: |
| 146 | + print("Both variant and model path provided. Removing variant") |
| 147 | + args.variant = None |
| 148 | + |
132 | 149 | # prepare the cuda model |
133 | 150 | cuda_model = get_model( |
134 | 151 | architecture=args.architecture, |
@@ -211,14 +228,14 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, seed=0): |
211 | 228 | failed_responses = validate_level_0(cpu_static_tokens, cuda_static_tokens) |
212 | 229 |
|
213 | 230 | print("extracted cuda validation information level 0") |
214 | | -if len(failed_responses) != 0: |
| 231 | +if len(failed_responses) != 0: |
215 | 232 | print_failed_cases(failed_responses, cpu_static_tokens, cuda_static_tokens, tokenizer) |
216 | 233 |
|
217 | 234 | def write_csv(l, path, metric): |
218 | 235 | with open(path, 'w') as f: |
219 | 236 | f.write(f'{metric}\n') |
220 | 237 | for t in l: |
221 | | - f.write(f"{t[2].item()}\n") |
| 238 | + f.write(f"{t[2].item()}\n") |
222 | 239 | f.close() |
223 | 240 |
|
224 | 241 | num_test_tokens_per_sequence = args.num_test_tokens_per_sequence |
|
0 commit comments