|
3 | 3 | from skip import Skip |
4 | 4 |
|
5 | 5 | device_configs = { |
| 6 | + # topk llm used |
| 7 | + # normal llm used |
6 | 8 | # temp for 910B |
7 | 9 | 'join': dict( |
8 | 10 | name=['stack'], |
|
481 | 483 | ), |
482 | 484 | ), |
483 | 485 |
|
484 | | - 'pow_tensor': dict( |
| 486 | + 'pow_tensor': dict( # llm used |
485 | 487 | name=['pow'], |
486 | 488 | tensor_para=dict( |
487 | 489 | args=[ |
|
493 | 495 | ), |
494 | 496 | ), |
495 | 497 |
|
496 | | - 'pow_tensor_only_0_1': dict( |
| 498 | + 'pow_tensor_only_0_1': dict( # llm used |
497 | 499 | name=['pow'], |
498 | 500 | tensor_para=dict( |
499 | 501 | args=[ |
|
505 | 507 | ), |
506 | 508 | ), |
507 | 509 |
|
508 | | - 'pow_diff_dtype': dict( |
| 510 | + 'pow_diff_dtype': dict( # llm used |
509 | 511 | name=['pow'], |
510 | 512 | tensor_para=dict( |
511 | 513 | args=[ |
|
517 | 519 | ), |
518 | 520 | ), |
519 | 521 |
|
520 | | - 'bmm': dict( |
| 522 | + 'bmm': dict( # llm used |
521 | 523 | name=['bmm'], |
522 | 524 | atol=3e-2, |
523 | 525 | rtol=3e-2, |
|
535 | 537 | ), |
536 | 538 | ), |
537 | 539 |
|
538 | | - 'reduce_op': dict( |
| 540 | + 'reduce_op': dict( # llm used |
539 | 541 | name=['sum'], |
540 | 542 | atol=1e-3, |
541 | 543 | rtol=1e-3, |
542 | 544 | ), |
543 | 545 |
|
544 | | - 'reduce_partial_op': dict( |
| 546 | + 'reduce_partial_op': dict( # llm used |
545 | 547 | atol=1e-3, |
546 | 548 | rtol=1e-3, |
547 | 549 | name=['sum'], |
|
571 | 573 | ), |
572 | 574 | ), |
573 | 575 |
|
574 | | - 'nll_loss': dict( |
| 576 | + 'nll_loss': dict( # llm used |
575 | 577 | name=['nll_loss'], |
576 | 578 | tensor_para=dict( |
577 | 579 | args=[ |
|
584 | 586 | ), |
585 | 587 | ), |
586 | 588 |
|
587 | | - 'nll_loss_empty_tensor': dict( |
| 589 | + 'nll_loss_empty_tensor': dict( # llm used |
588 | 590 | name=['nll_loss'], |
589 | 591 | tensor_para=dict( |
590 | 592 | args=[ |
|
675 | 677 | rtol = 1e-1, |
676 | 678 | ), |
677 | 679 |
|
678 | | - 'embedding': dict( |
| 680 | + 'embedding': dict( # llm used |
679 | 681 | name=["embedding"], |
680 | 682 | tensor_para=dict( |
681 | 683 | args=[ |
|
736 | 738 | ), |
737 | 739 | ), |
738 | 740 |
|
739 | | - 'split': dict( |
| 741 | + 'split': dict( # llm used |
740 | 742 | name=['split'], |
741 | 743 | tensor_para=dict( |
742 | 744 | args=[ |
|
1024 | 1026 | ), |
1025 | 1027 | ), |
1026 | 1028 |
|
1027 | | - 'mm': dict( |
| 1029 | + 'mm': dict( # llm used |
1028 | 1030 | name=['mm'], |
1029 | 1031 | atol=2e-2, |
1030 | 1032 | rtol=2e-2, |
1031 | 1033 | ), |
1032 | 1034 |
|
1033 | | - 'mm_diff_dtype': dict( |
| 1035 | + 'mm_diff_dtype': dict( # llm used |
1034 | 1036 | name=['mm'], |
1035 | 1037 | atol=2e-2, |
1036 | 1038 | rtol=2e-2, |
|
1156 | 1158 | ), |
1157 | 1159 | ), |
1158 | 1160 |
|
1159 | | - 'norm': dict( |
| 1161 | + 'norm': dict( # llm used |
1160 | 1162 | name=['norm'], |
1161 | 1163 | tensor_para=dict( |
1162 | 1164 | args=[ |
|
1232 | 1234 | ), |
1233 | 1235 | ), |
1234 | 1236 |
|
1235 | | - 'gather': dict( |
| 1237 | + 'gather': dict( # llm used |
1236 | 1238 | name=['gather'], |
1237 | 1239 | tensor_para=dict( |
1238 | 1240 | args=[ |
|
1244 | 1246 | ), |
1245 | 1247 | ), |
1246 | 1248 |
|
1247 | | - 'gather_0dim': dict( |
| 1249 | + 'gather_0dim': dict( # llm used |
1248 | 1250 | name=['gather'], |
1249 | 1251 | tensor_para=dict( |
1250 | 1252 | args=[ |
|
1256 | 1258 | ), |
1257 | 1259 | ), |
1258 | 1260 |
|
1259 | | - 'gather_not_float': dict( |
| 1261 | + 'gather_not_float': dict( # llm used |
1260 | 1262 | name=['gather'], |
1261 | 1263 | tensor_para=dict( |
1262 | 1264 | args=[ |
|
1268 | 1270 | ), |
1269 | 1271 | ), |
1270 | 1272 |
|
1271 | | - 'scatter': dict( |
| 1273 | + 'scatter': dict( # llm used |
1272 | 1274 | name=['scatter'], |
1273 | 1275 | tensor_para=dict( |
1274 | 1276 | args=[ |
|
1280 | 1282 | ), |
1281 | 1283 | ), |
1282 | 1284 |
|
1283 | | - 'scatter_scalar': dict( |
| 1285 | + 'scatter_scalar': dict( # llm used |
1284 | 1286 | name=['scatter'], |
1285 | 1287 | para=dict( |
1286 | 1288 | # In this case, for float32 (but not float64), no matter what the value parameter is, |
|
1291 | 1293 | ), |
1292 | 1294 | ), |
1293 | 1295 |
|
1294 | | - 'index_put_acc_three_indices': dict( |
| 1296 | + 'index_put_acc_three_indices': dict( # llm used |
1295 | 1297 | name=['index_put'], |
1296 | 1298 | tensor_para=dict( |
1297 | 1299 | args=[ |
|
1303 | 1305 | ), |
1304 | 1306 | ), |
1305 | 1307 |
|
1306 | | - 'index_put_acc_two_indices': dict( |
| 1308 | + 'index_put_acc_two_indices': dict( # llm used |
1307 | 1309 | name=['index_put'], |
1308 | 1310 | tensor_para=dict( |
1309 | 1311 | args=[ |
|
1315 | 1317 | ), |
1316 | 1318 | ), |
1317 | 1319 |
|
1318 | | - 'index_put_acc_one_indices': dict( |
| 1320 | + 'index_put_acc_one_indices': dict( # llm used |
1319 | 1321 | name=['index_put'], |
1320 | 1322 | tensor_para=dict( |
1321 | 1323 | args=[ |
|
1327 | 1329 | ), |
1328 | 1330 | ), |
1329 | 1331 |
|
1330 | | - 'index_put_acc_bool_indices_zeros': dict( |
| 1332 | + 'index_put_acc_bool_indices_zeros': dict( # llm used |
1331 | 1333 | name=['index_put'], |
1332 | 1334 | tensor_para=dict( |
1333 | 1335 | args=[ |
|
1339 | 1341 | ), |
1340 | 1342 | ), |
1341 | 1343 |
|
1342 | | - 'index_put_one_indices': dict( |
| 1344 | + 'index_put_one_indices': dict( # llm used |
1343 | 1345 | name=['index_put'], |
1344 | 1346 | tensor_para=dict( |
1345 | 1347 | args=[ |
|
1351 | 1353 | ), |
1352 | 1354 | ), |
1353 | 1355 |
|
1354 | | - 'index_put_bool_indices_value': dict( |
| 1356 | + 'index_put_bool_indices_value': dict( # llm used |
1355 | 1357 | name=['index_put'], |
1356 | 1358 | tensor_para=dict( |
1357 | 1359 | args=[ |
|
1436 | 1438 | ), |
1437 | 1439 | ), |
1438 | 1440 |
|
1439 | | - 'copy': dict( |
| 1441 | + 'copy': dict( # llm used |
1440 | 1442 | name=["copy_"], |
1441 | 1443 | tensor_para=dict( |
1442 | 1444 | # FIXME data type DT_COMPLEX128 of input [dst] is not supported |
|
1454 | 1456 | ) |
1455 | 1457 | ), |
1456 | 1458 |
|
1457 | | - 'copy_input_no_contiguous': dict( |
| 1459 | + 'copy_input_no_contiguous': dict( # llm used |
1458 | 1460 | name=["copy_"], |
1459 | 1461 | tensor_para=dict( |
1460 | 1462 | # FIXME not supported complex |
|
1472 | 1474 | ) |
1473 | 1475 | ), |
1474 | 1476 |
|
1475 | | - 'copy_other_no_contiguous': dict( |
| 1477 | + 'copy_other_no_contiguous': dict( # llm used |
1476 | 1478 | name=["copy_"], |
1477 | 1479 | tensor_para=dict( |
1478 | 1480 | # FIXME data type DT_COMPLEX64 of input [dst] is not supported |
|
1491 | 1493 | ) |
1492 | 1494 | ), |
1493 | 1495 |
|
1494 | | - 'copy_all_no_contiguous': dict( |
| 1496 | + 'copy_all_no_contiguous': dict( # llm used |
1495 | 1497 | name=["copy_"], |
1496 | 1498 | tensor_para=dict( |
1497 | 1499 | # FIXME data type DT_COMPLEX64 of input [dst] is not supported |
|
1509 | 1511 | ) |
1510 | 1512 | ), |
1511 | 1513 |
|
1512 | | - 'fill_not_float': dict( |
| 1514 | + 'fill_not_float': dict( # llm used |
1513 | 1515 | name=["fill_"], |
1514 | 1516 | tensor_para=dict( |
1515 | 1517 | args=[ |
|
1581 | 1583 | ), |
1582 | 1584 | ), |
1583 | 1585 |
|
1584 | | - 'repeat': dict( |
| 1586 | + 'repeat': dict( # llm used |
1585 | 1587 | name=['repeat'], |
1586 | 1588 | tensor_para=dict( |
1587 | 1589 | args=[ |
|
1641 | 1643 | ), |
1642 | 1644 | ), |
1643 | 1645 |
|
1644 | | - 'triu': dict( |
| 1646 | + 'triu': dict( # llm used |
1645 | 1647 | name=['triu'], |
1646 | 1648 | tensor_para=dict( |
1647 | 1649 | args=[ |
|
1677 | 1679 | ), |
1678 | 1680 | ), |
1679 | 1681 |
|
1680 | | - 'reduce_partial_op_4': dict( |
| 1682 | + 'reduce_partial_op_4': dict( # llm used |
1681 | 1683 | name=['sum'], |
1682 | 1684 | interface=['torch'], |
1683 | 1685 | atol=1e-4, |
|
0 commit comments