|
21 | 21 |
|
22 | 22 | from utils import * |
23 | 23 |
|
| 24 | +# install warmup_scheduler from https://github.com/ildoonet/pytorch-gradual-warmup-lr |
24 | 25 | from warmup_scheduler import GradualWarmupScheduler |
25 | 26 | import bnn.models.resnet as models |
26 | 27 | from bnn.engine import BinaryChef |
|
48 | 49 | help='mini-batch size (default: 256), this is the total ' |
49 | 50 | 'batch size of all GPUs on the current node when ' |
50 | 51 | 'using Data Parallel or Distributed Data Parallel') |
51 | | -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, |
| 52 | +parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, |
52 | 53 | metavar='LR', help='initial learning rate', dest='lr') |
53 | 54 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', |
54 | 55 | help='momentum') |
55 | | -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, |
| 56 | +parser.add_argument('--wd', '--weight-decay', default=1e-2, type=float, |
56 | 57 | metavar='W', help='weight decay (default: 1e-4)', |
57 | 58 | dest='weight_decay') |
58 | 59 | parser.add_argument('-p', '--print-freq', default=100, type=int, |
@@ -190,7 +191,7 @@ def main_worker(gpu, ngpus_per_node, args): |
190 | 191 |
|
191 | 192 | parameters = model.parameters() |
192 | 193 | if args.optimizer == 'adamw': |
193 | | - wd = 0.01 if args.step == 0 else 0.01 |
| 194 | + wd = args.weight_decay if args.step == 0 else 0 |
194 | 195 | optimizer = torch.optim.AdamW(parameters, args.lr, weight_decay=wd) |
195 | 196 | elif args.optimizer == 'adam': |
196 | 197 | optimizer = torch.optim.Adam(parameters, args.lr) |
|
0 commit comments