-
Notifications
You must be signed in to change notification settings - Fork 19
Description
I have a training script here:
if name == "main":
from maia2 import model, train, utils
maia2_model = model.from_pretrained(type="rapid", device="cpu")
cfg = utils.parse_args(cfg_file_path='./maia2_models/config.yaml')
train.run(cfg)
and the output is here:
me@mac maia2 % python3 run.py
Downloading model for rapid games.
Downloading...
From (original): https://drive.google.com/uc?id=1gbC1-c7c0EQOPPAVpGWubezeEW8grVwc
From (redirected): https://drive.google.com/uc?id=1gbC1-c7c0EQOPPAVpGWubezeEW8grVwc&confirm=t&uuid=1ffaed63-6012-4d70-a8d0-c2657d4180a1
To: /Volumes/Lichess/maia2/maia2_models/rapid_model.pt
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 280M/280M [00:07<00:00, 38.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1GQTskYMVMubNwZH2Bi6AmevI15CS6gk0
To: /Volumes/Lichess/maia2/maia2_models/config.yaml
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 701/701 [00:00<00:00, 1.86MB/s]
Model for rapid games loaded to cpu.
Configurations:
data_root: /datadrive2/lichess_data
seed: 42
num_workers: 16
verbose: 1
max_epochs: 3
max_ply: 300
clock_threshold: 30
chunk_size: 20000
start_year: 2018
start_month: 5
end_year: 2023
end_month: 11
from_checkpoint: False
checkpoint_epoch: 0
checkpoint_year: 2018
checkpoint_month: 5
num_cpu_left: 16
queue_length: 2
lr: 0.0001
wd: 1e-05
batch_size: 8192
first_n_moves: 10
last_n_moves: 10
dim_cnn: 256
dim_vit: 1024
num_blocks_cnn: 5
num_blocks_vit: 2
input_channels: 18
vit_length: 8
elo_dim: 128
side_info: True
side_info_coefficient: 1.0
value: True
value_coefficient: 1.0
max_games_per_elo_range: 20
MAIA2Model(
(chess_cnn): ChessResNet(
(conv1): Conv2d(18, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(layers): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(2): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(3): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(4): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
)
(conv_last): Conv2d(256, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn_last): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(to_patch_embedding): Sequential(
(0): Linear(in_features=64, out_features=1024, bias=True)
(1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(transformer): Transformer(
(norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(layers): ModuleList()
(elo_layers): ModuleList(
(0-1): 2 x ModuleList(
(0): EloAwareAttention(
(norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(attend): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(to_qkv): Linear(in_features=1024, out_features=3072, bias=False)
(elo_query): Linear(in_features=256, out_features=1024, bias=False)
(to_out): Sequential(
(0): Linear(in_features=1024, out_features=1024, bias=True)
(1): Dropout(p=0.1, inplace=False)
)
)
(1): FeedForward(
(net): Sequential(
(0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(1): Linear(in_features=1024, out_features=1024, bias=True)
(2): GELU(approximate='none')
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=1024, out_features=1024, bias=True)
(5): Dropout(p=0.1, inplace=False)
)
)
)
)
)
(fc_1): Linear(in_features=1024, out_features=1880, bias=True)
(fc_2): Linear(in_features=1024, out_features=2021, bias=True)
(fc_3): Linear(in_features=128, out_features=1, bias=True)
(fc_3_1): Linear(in_features=1024, out_features=128, bias=True)
(elo_embedding): Embedding(11, 128)
(dropout): Dropout(p=0.1, inplace=False)
(last_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
Traceback (most recent call last):
File "/Volumes/Lichess/maia2/run.py", line 5, in
train.run(cfg)
File "/Volumes/Lichess/maia2/maia2/train.py", line 33, in run
model = model.cuda()
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 916, in cuda
return self._apply(lambda t: t.cuda(device))
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 780, in _apply
module._apply(fn)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 780, in _apply
module._apply(fn)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 805, in _apply
param_applied = fn(param)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 916, in
return self._apply(lambda t: t.cuda(device))
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/cuda/init.py", line 305, in _lazy_init
raise AssertionError("Torch not compiled with CUDA enabled")
AssertionError: Torch not compiled with CUDA enabled
Are there any alternatives to having cuda? (on m4 mac)