| @ -0,0 +1,2 @@ | |||||
| models.ckpt | |||||
| training_state.bin | |||||
| @ -0,0 +1,8 @@ | |||||
| # BNN.pytorch | |||||
| Binarized Neural Network (BNN) for pytorch | |||||
| This is the pytorch version for the BNN code, fro VGG and resnet models | |||||
| Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||||
| The code is based on https://github.com/eladhoffer/convNet.pytorch | |||||
| Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||||
| To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 | |||||
| @ -0,0 +1,37 @@ | |||||
| import os | |||||
| import torchvision.datasets as datasets | |||||
| import torchvision.transforms as transforms | |||||
| _DATASETS_MAIN_PATH = '/home/Datasets' | |||||
| _dataset_path = { | |||||
| 'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||||
| 'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||||
| 'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||||
| 'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||||
| 'imagenet': { | |||||
| 'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||||
| 'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||||
| } | |||||
| } | |||||
| def get_dataset(name, split='train', transform=None, | |||||
| target_transform=None, download=True): | |||||
| train = (split == 'train') | |||||
| if name == 'cifar10': | |||||
| return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||||
| train=train, | |||||
| transform=transform, | |||||
| target_transform=target_transform, | |||||
| download=download) | |||||
| elif name == 'cifar100': | |||||
| return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||||
| train=train, | |||||
| transform=transform, | |||||
| target_transform=target_transform, | |||||
| download=download) | |||||
| elif name == 'imagenet': | |||||
| path = _dataset_path[name][split] | |||||
| return datasets.ImageFolder(root=path, | |||||
| transform=transform, | |||||
| target_transform=target_transform) | |||||
| @ -0,0 +1,309 @@ | |||||
| import argparse | |||||
| import os | |||||
| import time | |||||
| import logging | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.parallel | |||||
| import torch.backends.cudnn as cudnn | |||||
| import torch.optim | |||||
| import torch.utils.data | |||||
| import models | |||||
| from torch.autograd import Variable | |||||
| from data import get_dataset | |||||
| from preprocess import get_transform | |||||
| from utils import * | |||||
| from datetime import datetime | |||||
| from ast import literal_eval | |||||
| from torchvision.utils import save_image | |||||
| model_names = sorted(name for name in models.__dict__ | |||||
| if name.islower() and not name.startswith("__") | |||||
| and callable(models.__dict__[name])) | |||||
| <<<<<<< HEAD | |||||
| print(model_names) | |||||
| ======= | |||||
| >>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||||
| help='results dir') | |||||
| parser.add_argument('--save', metavar='SAVE', default='', | |||||
| help='saved folder') | |||||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
| help='dataset name or folder') | |||||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
| choices=model_names, | |||||
| help='model architecture: ' + | |||||
| ' | '.join(model_names) + | |||||
| ' (default: alexnet)') | |||||
| parser.add_argument('--input_size', type=int, default=None, | |||||
| help='image input size') | |||||
| parser.add_argument('--model_config', default='', | |||||
| help='additional architecture configuration') | |||||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
| parser.add_argument('--gpus', default='0', | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
| help='number of data loading workers (default: 8)') | |||||
| parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||||
| help='number of total epochs to run') | |||||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
| help='manual epoch number (useful on restarts)') | |||||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
| metavar='N', help='mini-batch size (default: 256)') | |||||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
| help='optimizer function used') | |||||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
| metavar='LR', help='initial learning rate') | |||||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
| help='momentum') | |||||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
| metavar='W', help='weight decay (default: 1e-4)') | |||||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
| metavar='N', help='print frequency (default: 10)') | |||||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
| help='path to latest checkpoint (default: none)') | |||||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
| help='evaluate model FILE on validation set') | |||||
| def main(): | |||||
| global args, best_prec1 | |||||
| best_prec1 = 0 | |||||
| args = parser.parse_args() | |||||
| if args.evaluate: | |||||
| args.results_dir = '/tmp' | |||||
| if args.save is '': | |||||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
| save_path = os.path.join(args.results_dir, args.save) | |||||
| if not os.path.exists(save_path): | |||||
| os.makedirs(save_path) | |||||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||||
| results_file = os.path.join(save_path, 'results.%s') | |||||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
| logging.info("saving to %s", save_path) | |||||
| logging.debug("run arguments: %s", args) | |||||
| if 'cuda' in args.type: | |||||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
| torch.cuda.set_device(args.gpus[0]) | |||||
| cudnn.benchmark = True | |||||
| else: | |||||
| args.gpus = None | |||||
| # create model | |||||
| logging.info("creating model %s", args.model) | |||||
| model = models.__dict__[args.model] | |||||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||||
| if args.model_config is not '': | |||||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
| model = model(**model_config) | |||||
| logging.info("created model with configuration: %s", model_config) | |||||
| # optionally resume from a checkpoint | |||||
| if args.evaluate: | |||||
| if not os.path.isfile(args.evaluate): | |||||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
| checkpoint = torch.load(args.evaluate) | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| args.evaluate, checkpoint['epoch']) | |||||
| elif args.resume: | |||||
| checkpoint_file = args.resume | |||||
| if os.path.isdir(checkpoint_file): | |||||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
| checkpoint_file = os.path.join( | |||||
| checkpoint_file, 'model_best.pth.tar') | |||||
| if os.path.isfile(checkpoint_file): | |||||
| logging.info("loading checkpoint '%s'", args.resume) | |||||
| checkpoint = torch.load(checkpoint_file) | |||||
| args.start_epoch = checkpoint['epoch'] - 1 | |||||
| best_prec1 = checkpoint['best_prec1'] | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| checkpoint_file, checkpoint['epoch']) | |||||
| else: | |||||
| logging.error("no checkpoint found at '%s'", args.resume) | |||||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
| logging.info("number of parameters: %d", num_parameters) | |||||
| # Data loading code | |||||
| default_transform = { | |||||
| 'train': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=True), | |||||
| 'eval': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=False) | |||||
| } | |||||
| transform = getattr(model, 'input_transform', default_transform) | |||||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
| 'lr': args.lr, | |||||
| 'momentum': args.momentum, | |||||
| 'weight_decay': args.weight_decay}}) | |||||
| # define loss function (criterion) and optimizer | |||||
| criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||||
| criterion.type(args.type) | |||||
| model.type(args.type) | |||||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
| val_loader = torch.utils.data.DataLoader( | |||||
| val_data, | |||||
| batch_size=args.batch_size, shuffle=False, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| if args.evaluate: | |||||
| validate(val_loader, model, criterion, 0) | |||||
| return | |||||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| train_data, | |||||
| batch_size=args.batch_size, shuffle=True, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
| logging.info('training regime: %s', regime) | |||||
| for epoch in range(args.start_epoch, args.epochs): | |||||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
| # train for one epoch | |||||
| train_loss, train_prec1, train_prec5 = train( | |||||
| train_loader, model, criterion, epoch, optimizer) | |||||
| # evaluate on validation set | |||||
| val_loss, val_prec1, val_prec5 = validate( | |||||
| val_loader, model, criterion, epoch) | |||||
| # remember best prec@1 and save checkpoint | |||||
| is_best = val_prec1 > best_prec1 | |||||
| best_prec1 = max(val_prec1, best_prec1) | |||||
| save_checkpoint({ | |||||
| 'epoch': epoch + 1, | |||||
| 'model': args.model, | |||||
| 'config': args.model_config, | |||||
| 'state_dict': model.state_dict(), | |||||
| 'best_prec1': best_prec1, | |||||
| 'regime': regime | |||||
| }, is_best, path=save_path) | |||||
| logging.info('\n Epoch: {0}\t' | |||||
| 'Training Loss {train_loss:.4f} \t' | |||||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||||
| 'Validation Loss {val_loss:.4f} \t' | |||||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
| #results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
| # title='Loss', ylabel='loss') | |||||
| #results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
| # title='Error@1', ylabel='error %') | |||||
| #results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
| # title='Error@5', ylabel='error %') | |||||
| results.save() | |||||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
| if args.gpus and len(args.gpus) > 1: | |||||
| model = torch.nn.DataParallel(model, args.gpus) | |||||
| batch_time = AverageMeter() | |||||
| data_time = AverageMeter() | |||||
| losses = AverageMeter() | |||||
| top1 = AverageMeter() | |||||
| top5 = AverageMeter() | |||||
| end = time.time() | |||||
| for i, (inputs, target) in enumerate(data_loader): | |||||
| # measure data loading time | |||||
| data_time.update(time.time() - end) | |||||
| if args.gpus is not None: | |||||
| target = target.cuda() | |||||
| if not training: | |||||
| with torch.no_grad(): | |||||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
| target_var = Variable(target) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| else: | |||||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
| target_var = Variable(target) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| loss = criterion(output, target_var) | |||||
| if type(output) is list: | |||||
| output = output[0] | |||||
| # measure accuracy and record loss | |||||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
| losses.update(loss.item(), inputs.size(0)) | |||||
| top1.update(prec1.item(), inputs.size(0)) | |||||
| top5.update(prec5.item(), inputs.size(0)) | |||||
| if training: | |||||
| # compute gradient and do SGD step | |||||
| optimizer.zero_grad() | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.data.copy_(p.org) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| # measure elapsed time | |||||
| batch_time.update(time.time() - end) | |||||
| end = time.time() | |||||
| if i % args.print_freq == 0: | |||||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
| epoch, i, len(data_loader), | |||||
| phase='TRAINING' if training else 'EVALUATING', | |||||
| batch_time=batch_time, | |||||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
| return losses.avg, top1.avg, top5.avg | |||||
| def train(data_loader, model, criterion, epoch, optimizer): | |||||
| # switch to train mode | |||||
| model.train() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=True, optimizer=optimizer) | |||||
| def validate(data_loader, model, criterion, epoch): | |||||
| # switch to evaluate mode | |||||
| model.eval() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=False, optimizer=None) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,332 @@ | |||||
| import argparse | |||||
| import os | |||||
| import time | |||||
| import logging | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.parallel | |||||
| import torch.backends.cudnn as cudnn | |||||
| import torch.optim | |||||
| import torch.utils.data | |||||
| import models | |||||
| from torch.autograd import Variable | |||||
| from data import get_dataset | |||||
| from preprocess import get_transform | |||||
| from utils import * | |||||
| from datetime import datetime | |||||
| from ast import literal_eval | |||||
| from torchvision.utils import save_image | |||||
| from models.binarized_modules import HingeLoss | |||||
| model_names = sorted(name for name in models.__dict__ | |||||
| if name.islower() and not name.startswith("__") | |||||
| and callable(models.__dict__[name])) | |||||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||||
| help='results dir') | |||||
| parser.add_argument('--save', metavar='SAVE', default='', | |||||
| help='saved folder') | |||||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
| help='dataset name or folder') | |||||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
| choices=model_names, | |||||
| help='model architecture: ' + | |||||
| ' | '.join(model_names) + | |||||
| ' (default: alexnet)') | |||||
| parser.add_argument('--input_size', type=int, default=None, | |||||
| help='image input size') | |||||
| parser.add_argument('--model_config', default='', | |||||
| help='additional architecture configuration') | |||||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
| parser.add_argument('--gpus', default='0', | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
| help='number of data loading workers (default: 8)') | |||||
| parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||||
| help='number of total epochs to run') | |||||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
| help='manual epoch number (useful on restarts)') | |||||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
| metavar='N', help='mini-batch size (default: 256)') | |||||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
| help='optimizer function used') | |||||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
| metavar='LR', help='initial learning rate') | |||||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
| help='momentum') | |||||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
| metavar='W', help='weight decay (default: 1e-4)') | |||||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
| metavar='N', help='print frequency (default: 10)') | |||||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
| help='path to latest checkpoint (default: none)') | |||||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
| help='evaluate model FILE on validation set') | |||||
| torch.cuda.random.manual_seed_all(10) | |||||
| output_dim = 0 | |||||
| def main(): | |||||
| global args, best_prec1, output_dim | |||||
| best_prec1 = 0 | |||||
| args = parser.parse_args() | |||||
| output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||||
| #import pdb; pdb.set_trace() | |||||
| #torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||||
| if args.evaluate: | |||||
| args.results_dir = '/tmp' | |||||
| if args.save is '': | |||||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
| save_path = os.path.join(args.results_dir, args.save) | |||||
| if not os.path.exists(save_path): | |||||
| os.makedirs(save_path) | |||||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||||
| results_file = os.path.join(save_path, 'results.%s') | |||||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
| logging.info("saving to %s", save_path) | |||||
| logging.debug("run arguments: %s", args) | |||||
| if 'cuda' in args.type: | |||||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
| torch.cuda.set_device(args.gpus[0]) | |||||
| cudnn.benchmark = True | |||||
| else: | |||||
| args.gpus = None | |||||
| # create model | |||||
| logging.info("creating model %s", args.model) | |||||
| model = models.__dict__[args.model] | |||||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||||
| if args.model_config is not '': | |||||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
| model = model(**model_config) | |||||
| logging.info("created model with configuration: %s", model_config) | |||||
| # optionally resume from a checkpoint | |||||
| if args.evaluate: | |||||
| if not os.path.isfile(args.evaluate): | |||||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
| checkpoint = torch.load(args.evaluate) | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| args.evaluate, checkpoint['epoch']) | |||||
| elif args.resume: | |||||
| checkpoint_file = args.resume | |||||
| if os.path.isdir(checkpoint_file): | |||||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
| checkpoint_file = os.path.join( | |||||
| checkpoint_file, 'model_best.pth.tar') | |||||
| if os.path.isfile(checkpoint_file): | |||||
| logging.info("loading checkpoint '%s'", args.resume) | |||||
| checkpoint = torch.load(checkpoint_file) | |||||
| args.start_epoch = checkpoint['epoch'] - 1 | |||||
| best_prec1 = checkpoint['best_prec1'] | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| checkpoint_file, checkpoint['epoch']) | |||||
| else: | |||||
| logging.error("no checkpoint found at '%s'", args.resume) | |||||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
| logging.info("number of parameters: %d", num_parameters) | |||||
| # Data loading code | |||||
| default_transform = { | |||||
| 'train': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=True), | |||||
| 'eval': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=False) | |||||
| } | |||||
| transform = getattr(model, 'input_transform', default_transform) | |||||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
| 'lr': args.lr, | |||||
| 'momentum': args.momentum, | |||||
| 'weight_decay': args.weight_decay}}) | |||||
| # define loss function (criterion) and optimizer | |||||
| #criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||||
| criterion = getattr(model, 'criterion', HingeLoss)() | |||||
| #criterion.type(args.type) | |||||
| model.type(args.type) | |||||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
| val_loader = torch.utils.data.DataLoader( | |||||
| val_data, | |||||
| batch_size=args.batch_size, shuffle=False, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| if args.evaluate: | |||||
| validate(val_loader, model, criterion, 0) | |||||
| return | |||||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| train_data, | |||||
| batch_size=args.batch_size, shuffle=True, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
| logging.info('training regime: %s', regime) | |||||
| #import pdb; pdb.set_trace() | |||||
| #search_binarized_modules(model) | |||||
| for epoch in range(args.start_epoch, args.epochs): | |||||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
| # train for one epoch | |||||
| train_loss, train_prec1, train_prec5 = train( | |||||
| train_loader, model, criterion, epoch, optimizer) | |||||
| # evaluate on validation set | |||||
| val_loss, val_prec1, val_prec5 = validate( | |||||
| val_loader, model, criterion, epoch) | |||||
| # remember best prec@1 and save checkpoint | |||||
| is_best = val_prec1 > best_prec1 | |||||
| best_prec1 = max(val_prec1, best_prec1) | |||||
| save_checkpoint({ | |||||
| 'epoch': epoch + 1, | |||||
| 'model': args.model, | |||||
| 'config': args.model_config, | |||||
| 'state_dict': model.state_dict(), | |||||
| 'best_prec1': best_prec1, | |||||
| 'regime': regime | |||||
| }, is_best, path=save_path) | |||||
| logging.info('\n Epoch: {0}\t' | |||||
| 'Training Loss {train_loss:.4f} \t' | |||||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||||
| 'Validation Loss {val_loss:.4f} \t' | |||||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
| results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
| title='Loss', ylabel='loss') | |||||
| results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
| title='Error@1', ylabel='error %') | |||||
| results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
| title='Error@5', ylabel='error %') | |||||
| results.save() | |||||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
| if args.gpus and len(args.gpus) > 1: | |||||
| model = torch.nn.DataParallel(model, args.gpus) | |||||
| batch_time = AverageMeter() | |||||
| data_time = AverageMeter() | |||||
| losses = AverageMeter() | |||||
| top1 = AverageMeter() | |||||
| top5 = AverageMeter() | |||||
| end = time.time() | |||||
| for i, (inputs, target) in enumerate(data_loader): | |||||
| # measure data loading time | |||||
| data_time.update(time.time() - end) | |||||
| if args.gpus is not None: | |||||
| target = target.cuda() | |||||
| #import pdb; pdb.set_trace() | |||||
| if criterion.__class__.__name__=='HingeLoss': | |||||
| target=target.unsqueeze(1) | |||||
| target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||||
| target_onehot.fill_(-1) | |||||
| target_onehot.scatter_(1, target, 1) | |||||
| target=target.squeeze() | |||||
| if not training: | |||||
| with torch.no_grad(): | |||||
| input_var = Variable(inputs.type(args.type)) | |||||
| target_var = Variable(target_onehot) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| else: | |||||
| input_var = Variable(inputs.type(args.type)) | |||||
| target_var = Variable(target_onehot) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| #import pdb; pdb.set_trace() | |||||
| loss = criterion(output, target_onehot) | |||||
| #import pdb; pdb.set_trace() | |||||
| if type(output) is list: | |||||
| output = output[0] | |||||
| # measure accuracy and record loss | |||||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
| losses.update(loss.item(), inputs.size(0)) | |||||
| top1.update(prec1.item(), inputs.size(0)) | |||||
| top5.update(prec5.item(), inputs.size(0)) | |||||
| #import pdb; pdb.set_trace() | |||||
| #if not training and top1.avg<15: | |||||
| # import pdb; pdb.set_trace() | |||||
| if training: | |||||
| # compute gradient and do SGD step | |||||
| optimizer.zero_grad() | |||||
| #add backwoed hook | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| #import pdb; pdb.set_trace() | |||||
| if hasattr(p,'org'): | |||||
| #print('before:', p[0][0]) | |||||
| #gm=max(p.grad.data.max(),-p.grad.data.min()) | |||||
| #p.grad=p.grad.div(gm+1) | |||||
| p.data.copy_(p.org) | |||||
| #print('after:', p[0][0]) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| #import pdb; pdb.set_trace() | |||||
| if hasattr(p,'org'): | |||||
| #print('before:', p[0][0]) | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| #if epoch>30: | |||||
| # import pdb; pdb.set_trace() | |||||
| # measure elapsed time | |||||
| batch_time.update(time.time() - end) | |||||
| end = time.time() | |||||
| if i % args.print_freq == 0: | |||||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
| epoch, i, len(data_loader), | |||||
| phase='TRAINING' if training else 'EVALUATING', | |||||
| batch_time=batch_time, | |||||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
| return losses.avg, top1.avg, top5.avg | |||||
| def train(data_loader, model, criterion, epoch, optimizer): | |||||
| # switch to train mode | |||||
| model.train() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=True, optimizer=optimizer) | |||||
| def validate(data_loader, model, criterion, epoch): | |||||
| # switch to evaluate mode | |||||
| model.eval() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=False, optimizer=None) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,150 @@ | |||||
| from __future__ import print_function | |||||
| import argparse | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.functional as F | |||||
| import torch.optim as optim | |||||
| from torchvision import datasets, transforms | |||||
| from torch.autograd import Variable | |||||
| from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| from models.binarized_modules import Binarize,HingeLoss | |||||
| # Training settings | |||||
| parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||||
| parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||||
| help='input batch size for training (default: 256)') | |||||
| parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||||
| help='input batch size for testing (default: 1000)') | |||||
| parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||||
| help='number of epochs to train (default: 10)') | |||||
| parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||||
| help='learning rate (default: 0.001)') | |||||
| parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||||
| help='SGD momentum (default: 0.5)') | |||||
| parser.add_argument('--no-cuda', action='store_true', default=False, | |||||
| help='disables CUDA training') | |||||
| parser.add_argument('--seed', type=int, default=1, metavar='S', | |||||
| help='random seed (default: 1)') | |||||
| parser.add_argument('--gpus', default=3, | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||||
| help='how many batches to wait before logging training status') | |||||
| args = parser.parse_args() | |||||
| args.cuda = not args.no_cuda and torch.cuda.is_available() | |||||
| torch.manual_seed(args.seed) | |||||
| if args.cuda: | |||||
| torch.cuda.manual_seed(args.seed) | |||||
| kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| datasets.MNIST('../data', train=True, download=True, | |||||
| transform=transforms.Compose([ | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize((0.1307,), (0.3081,)) | |||||
| ])), | |||||
| batch_size=args.batch_size, shuffle=True, **kwargs) | |||||
| test_loader = torch.utils.data.DataLoader( | |||||
| datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize((0.1307,), (0.3081,)) | |||||
| ])), | |||||
| batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||||
| class Net(nn.Module): | |||||
| def __init__(self): | |||||
| super(Net, self).__init__() | |||||
| self.infl_ratio=3 | |||||
| self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||||
| self.htanh1 = nn.Hardtanh() | |||||
| self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
| self.htanh2 = nn.Hardtanh() | |||||
| self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
| self.htanh3 = nn.Hardtanh() | |||||
| self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||||
| self.logsoftmax=nn.LogSoftmax() | |||||
| self.drop=nn.Dropout(0.5) | |||||
| def forward(self, x): | |||||
| x = x.view(-1, 28*28) | |||||
| x = self.fc1(x) | |||||
| x = self.bn1(x) | |||||
| x = self.htanh1(x) | |||||
| x = self.fc2(x) | |||||
| x = self.bn2(x) | |||||
| x = self.htanh2(x) | |||||
| x = self.fc3(x) | |||||
| x = self.drop(x) | |||||
| x = self.bn3(x) | |||||
| x = self.htanh3(x) | |||||
| x = self.fc4(x) | |||||
| return self.logsoftmax(x) | |||||
| model = Net() | |||||
| if args.cuda: | |||||
| torch.cuda.set_device(3) | |||||
| model.cuda() | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||||
| def train(epoch): | |||||
| model.train() | |||||
| for batch_idx, (data, target) in enumerate(train_loader): | |||||
| if args.cuda: | |||||
| data, target = data.cuda(), target.cuda() | |||||
| data, target = Variable(data), Variable(target) | |||||
| optimizer.zero_grad() | |||||
| output = model(data) | |||||
| loss = criterion(output, target) | |||||
| if epoch%40==0: | |||||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
| optimizer.zero_grad() | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.data.copy_(p.org) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| if batch_idx % args.log_interval == 0: | |||||
| print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||||
| epoch, batch_idx * len(data), len(train_loader.dataset), | |||||
| 100. * batch_idx / len(train_loader), loss.item())) | |||||
| def test(): | |||||
| model.eval() | |||||
| test_loss = 0 | |||||
| correct = 0 | |||||
| with torch.no_grad(): | |||||
| for data, target in test_loader: | |||||
| if args.cuda: | |||||
| data, target = data.cuda(), target.cuda() | |||||
| data, target = Variable(data), Variable(target) | |||||
| output = model(data) | |||||
| test_loss += criterion(output, target).item() # sum up batch loss | |||||
| pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||||
| correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||||
| test_loss /= len(test_loader.dataset) | |||||
| print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||||
| test_loss, correct, len(test_loader.dataset), | |||||
| 100. * correct / len(test_loader.dataset))) | |||||
| for epoch in range(1, args.epochs + 1): | |||||
| train(epoch) | |||||
| test() | |||||
| if epoch%40==0: | |||||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
| @ -0,0 +1,6 @@ | |||||
| from .alexnet import * | |||||
| from .alexnet_binary import * | |||||
| from .resnet import * | |||||
| from .resnet_binary import * | |||||
| from .vgg_cifar10_binary import * | |||||
| @ -0,0 +1,78 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| __all__ = ['alexnet'] | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.features = nn.Sequential( | |||||
| nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||||
| bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(64), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(192), | |||||
| nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(384), | |||||
| nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| nn.Linear(256 * 6 * 6, 4096, bias=False), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(4096, 4096, bias=False), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(4096, num_classes) | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| 10: {'lr': 5e-3}, | |||||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 20: {'lr': 5e-4}, | |||||
| 25: {'lr': 1e-4} | |||||
| } | |||||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
| std=[0.229, 0.224, 0.225]) | |||||
| self.input_transform = { | |||||
| 'train': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.RandomCrop(224), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]), | |||||
| 'eval': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.CenterCrop(224), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]) | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 256 * 6 * 6) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def alexnet(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,92 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| __all__ = ['alexnet_binary'] | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.ratioInfl=3 | |||||
| self.features = nn.Sequential( | |||||
| BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(int(64*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(int(192*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||||
| nn.BatchNorm2d(int(384*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||||
| nn.BatchNorm2d(int(256*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Hardtanh(inplace=True) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| BinarizeLinear(256 * 6 * 6, 4096), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(4096, 4096), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(4096, num_classes), | |||||
| nn.BatchNorm1d(1000), | |||||
| nn.LogSoftmax() | |||||
| ) | |||||
| #self.regime = { | |||||
| # 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| # 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| # 10: {'lr': 5e-3}, | |||||
| # 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| # 20: {'lr': 5e-4}, | |||||
| # 25: {'lr': 1e-4} | |||||
| #} | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
| 20: {'lr': 1e-3}, | |||||
| 30: {'lr': 5e-4}, | |||||
| 35: {'lr': 1e-4}, | |||||
| 40: {'lr': 1e-5} | |||||
| } | |||||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
| std=[0.229, 0.224, 0.225]) | |||||
| self.input_transform = { | |||||
| 'train': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.RandomCrop(224), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]), | |||||
| 'eval': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.CenterCrop(224), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]) | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 256 * 6 * 6) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def alexnet_binary(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,109 @@ | |||||
| import torch | |||||
| import pdb | |||||
| import torch.nn as nn | |||||
| import math | |||||
| from torch.autograd import Variable | |||||
| from torch.autograd import Function | |||||
| import numpy as np | |||||
| def Binarize(tensor,quant_mode='det'): | |||||
| if quant_mode=='det': | |||||
| return tensor.sign() | |||||
| else: | |||||
| return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||||
| class HingeLoss(nn.Module): | |||||
| def __init__(self): | |||||
| super(HingeLoss,self).__init__() | |||||
| self.margin=1.0 | |||||
| def hinge_loss(self,input,target): | |||||
| #import pdb; pdb.set_trace() | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| return output.mean() | |||||
| def forward(self, input, target): | |||||
| return self.hinge_loss(input,target) | |||||
| class SqrtHingeLossFunction(Function): | |||||
| def __init__(self): | |||||
| super(SqrtHingeLossFunction,self).__init__() | |||||
| self.margin=1.0 | |||||
| def forward(self, input, target): | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| self.save_for_backward(input, target) | |||||
| loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||||
| return loss | |||||
| def backward(self,grad_output): | |||||
| input, target = self.saved_tensors | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| import pdb; pdb.set_trace() | |||||
| grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||||
| grad_output.mul_(output.ne(0).float()) | |||||
| grad_output.div_(input.numel()) | |||||
| return grad_output,grad_output | |||||
| def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||||
| tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||||
| if quant_mode=='det': | |||||
| tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||||
| else: | |||||
| tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||||
| quant_fixed(tensor, params) | |||||
| return tensor | |||||
| #import torch.nn._functions as tnnf | |||||
| class BinarizeLinear(nn.Linear): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||||
| def forward(self, input): | |||||
| # if input.size(1) != 784: | |||||
| # input.data=Binarize(input.data) | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| out = nn.functional.linear(input, self.weight) | |||||
| if not self.bias is None: | |||||
| self.bias.org=self.bias.data.clone() | |||||
| out += self.bias.view(1, -1).expand_as(out) | |||||
| return out | |||||
| class BinarizeConv2d(nn.Conv2d): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||||
| def forward(self, input): | |||||
| # if input.size(1) != 3: | |||||
| # input.data = Binarize(input.data) | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
| self.padding, self.dilation, self.groups) | |||||
| if not self.bias is None: | |||||
| self.bias.org=self.bias.data.clone() | |||||
| out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
| return out | |||||
| # x = torch.tensor([[255.0, 200.0, 201.0], [210.0, 222.0, 223.0]]) | |||||
| # print(Quantize(x,quant_mode='det', params=None, numBits=8)) | |||||
| @ -0,0 +1,217 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| import math | |||||
| __all__ = ['resnet'] | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def init_model(model): | |||||
| for m in model.modules(): | |||||
| if isinstance(m, nn.Conv2d): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
| elif isinstance(m, nn.BatchNorm2d): | |||||
| m.weight.data.fill_(1) | |||||
| m.bias.data.zero_() | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.conv2 = conv3x3(planes, planes) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| out = self.relu(out) | |||||
| return out | |||||
| class Bottleneck(nn.Module): | |||||
| expansion = 4 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(Bottleneck, self).__init__() | |||||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv3(out) | |||||
| out = self.bn3(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| out = self.relu(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| nn.Conv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.bn1(x) | |||||
| x = self.relu(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.fc(x) | |||||
| return x | |||||
| class ResNet_imagenet(ResNet): | |||||
| def __init__(self, num_classes=1000, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
| super(ResNet_imagenet, self).__init__() | |||||
| self.inplanes = 64 | |||||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(64) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
| self.avgpool = nn.AvgPool2d(7) | |||||
| self.fc = nn.Linear(512 * block.expansion, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 30: {'lr': 1e-2}, | |||||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 90: {'lr': 1e-4} | |||||
| } | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=10, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inplanes = 16 | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(16) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.maxpool = lambda x: x | |||||
| self.layer1 = self._make_layer(block, 16, n) | |||||
| self.layer2 = self._make_layer(block, 32, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64, n, stride=2) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.fc = nn.Linear(64, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 81: {'lr': 1e-2}, | |||||
| 122: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 164: {'lr': 1e-4} | |||||
| } | |||||
| def resnet(**kwargs): | |||||
| num_classes, depth, dataset = map( | |||||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
| if dataset == 'imagenet': | |||||
| num_classes = num_classes or 1000 | |||||
| depth = depth or 50 | |||||
| if depth == 18: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
| if depth == 34: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
| if depth == 50: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
| if depth == 101: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
| if depth == 152: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
| elif dataset == 'cifar10': | |||||
| num_classes = num_classes or 10 | |||||
| depth = depth or 18 #56 | |||||
| return ResNet_cifar10(num_classes=num_classes, | |||||
| block=BasicBlock, depth=depth) | |||||
| @ -0,0 +1,248 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| import math | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| __all__ = ['resnet_binary'] | |||||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def init_model(model): | |||||
| for m in model.modules(): | |||||
| if isinstance(m, BinarizeConv2d): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
| elif isinstance(m, nn.BatchNorm2d): | |||||
| m.weight.data.fill_(1) | |||||
| m.bias.data.zero_() | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.conv2 = Binaryconv3x3(planes, planes) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.do_bntan=do_bntan; | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x.clone() | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh1(out) | |||||
| out = self.conv2(out) | |||||
| if self.downsample is not None: | |||||
| if residual.data.max()>1: | |||||
| import pdb; pdb.set_trace() | |||||
| residual = self.downsample(residual) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class Bottleneck(nn.Module): | |||||
| expansion = 4 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(Bottleneck, self).__init__() | |||||
| self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
| self.tanh = nn.Hardtanh(inplace=True) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| import pdb; pdb.set_trace() | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| out = self.tanh(out) | |||||
| out = self.conv3(out) | |||||
| out = self.bn3(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks-1): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.bn1(x) | |||||
| x = self.tanh1(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.bn2(x) | |||||
| x = self.tanh2(x) | |||||
| x = self.fc(x) | |||||
| x = self.bn3(x) | |||||
| x = self.logsoftmax(x) | |||||
| return x | |||||
| class ResNet_imagenet(ResNet): | |||||
| def __init__(self, num_classes=1000, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
| super(ResNet_imagenet, self).__init__() | |||||
| self.inplanes = 64 | |||||
| self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(64) | |||||
| self.tanh = nn.Hardtanh(inplace=True) | |||||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
| self.avgpool = nn.AvgPool2d(7) | |||||
| self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 30: {'lr': 1e-2}, | |||||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 90: {'lr': 1e-4} | |||||
| } | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=10, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inflate = 5 | |||||
| self.inplanes = 16*self.inflate | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.maxpool = lambda x: x | |||||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
| self.bn3 = nn.BatchNorm1d(10) | |||||
| self.logsoftmax = nn.LogSoftmax() | |||||
| self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||||
| init_model(self) | |||||
| #self.regime = { | |||||
| # 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| # 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| # 81: {'lr': 1e-4}, | |||||
| # 122: {'lr': 1e-5, 'weight_decay': 0}, | |||||
| # 164: {'lr': 1e-6} | |||||
| #} | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
| 101: {'lr': 1e-3}, | |||||
| 142: {'lr': 5e-4}, | |||||
| 184: {'lr': 1e-4}, | |||||
| 220: {'lr': 1e-5} | |||||
| } | |||||
| def resnet_binary(**kwargs): | |||||
| num_classes, depth, dataset = map( | |||||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
| if dataset == 'imagenet': | |||||
| num_classes = num_classes or 1000 | |||||
| depth = depth or 50 | |||||
| if depth == 18: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
| if depth == 34: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
| if depth == 50: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
| if depth == 101: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
| if depth == 152: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
| elif dataset == 'cifar10': | |||||
| num_classes = num_classes or 10 | |||||
| depth = depth or 18 | |||||
| return ResNet_cifar10(num_classes=num_classes, | |||||
| block=BasicBlock, depth=depth) | |||||
| @ -0,0 +1,69 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.features = nn.Sequential( | |||||
| nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||||
| bias=False), | |||||
| nn.BatchNorm2d(128), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(128), | |||||
| nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(512), | |||||
| nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(512), | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| nn.Linear(512 * 4 * 4, 1024, bias=False), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(1024, 1024, bias=False), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(1024, num_classes) | |||||
| nn.LogSoftMax() | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| 10: {'lr': 5e-3}, | |||||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 20: {'lr': 5e-4}, | |||||
| 25: {'lr': 1e-4} | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 512 * 4 * 4) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def model(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,80 @@ | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torch.autograd import Function | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| class VGG_Cifar10(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(VGG_Cifar10, self).__init__() | |||||
| self.infl_ratio=3; | |||||
| self.features = nn.Sequential( | |||||
| BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||||
| bias=True), | |||||
| nn.BatchNorm2d(128*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(128*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.BatchNorm2d(256*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(256*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.BatchNorm2d(512*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(512), | |||||
| nn.Hardtanh(inplace=True) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(1024, 1024, bias=True), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(1024, num_classes, bias=True), | |||||
| nn.BatchNorm1d(num_classes, affine=False), | |||||
| nn.LogSoftmax() | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||||
| 40: {'lr': 1e-3}, | |||||
| 80: {'lr': 5e-4}, | |||||
| 100: {'lr': 1e-4}, | |||||
| 120: {'lr': 5e-5}, | |||||
| 140: {'lr': 1e-5} | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 512 * 4 * 4) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def vgg_cifar10_binary(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 10) | |||||
| return VGG_Cifar10(num_classes) | |||||
| @ -0,0 +1,198 @@ | |||||
| import torch | |||||
| import torchvision.transforms as transforms | |||||
| import random | |||||
| __imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||||
| 'std': [0.229, 0.224, 0.225]} | |||||
| __imagenet_pca = { | |||||
| 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||||
| 'eigvec': torch.Tensor([ | |||||
| [-0.5675, 0.7192, 0.4009], | |||||
| [-0.5808, -0.0045, -0.8140], | |||||
| [-0.5836, -0.6948, 0.4203], | |||||
| ]) | |||||
| } | |||||
| def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| t_list = [ | |||||
| transforms.CenterCrop(input_size), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ] | |||||
| if scale_size != input_size: | |||||
| t_list = [transforms.Scale(scale_size)] + t_list | |||||
| return transforms.Compose(t_list) | |||||
| def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| t_list = [ | |||||
| transforms.RandomCrop(input_size), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ] | |||||
| if scale_size != input_size: | |||||
| t_list = [transforms.Scale(scale_size)] + t_list | |||||
| transforms.Compose(t_list) | |||||
| def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| padding = int((scale_size - input_size) / 2) | |||||
| return transforms.Compose([ | |||||
| transforms.RandomCrop(input_size, padding=padding), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ]) | |||||
| def inception_preproccess(input_size, normalize=__imagenet_stats): | |||||
| return transforms.Compose([ | |||||
| transforms.RandomSizedCrop(input_size), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize) | |||||
| ]) | |||||
| def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||||
| return transforms.Compose([ | |||||
| transforms.RandomSizedCrop(input_size), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| ColorJitter( | |||||
| brightness=0.4, | |||||
| contrast=0.4, | |||||
| saturation=0.4, | |||||
| ), | |||||
| Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||||
| transforms.Normalize(**normalize) | |||||
| ]) | |||||
| def get_transform(name='imagenet', input_size=None, | |||||
| scale_size=None, normalize=None, augment=True): | |||||
| normalize = normalize or __imagenet_stats | |||||
| if name == 'imagenet': | |||||
| scale_size = scale_size or 256 | |||||
| input_size = input_size or 224 | |||||
| if augment: | |||||
| return inception_preproccess(input_size, normalize=normalize) | |||||
| else: | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| elif 'cifar' in name: | |||||
| input_size = input_size or 32 | |||||
| if augment: | |||||
| scale_size = scale_size or 40 | |||||
| return pad_random_crop(input_size, scale_size=scale_size, | |||||
| normalize=normalize) | |||||
| else: | |||||
| scale_size = scale_size or 32 | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| elif name == 'mnist': | |||||
| normalize = {'mean': [0.5], 'std': [0.5]} | |||||
| input_size = input_size or 28 | |||||
| if augment: | |||||
| scale_size = scale_size or 32 | |||||
| return pad_random_crop(input_size, scale_size=scale_size, | |||||
| normalize=normalize) | |||||
| else: | |||||
| scale_size = scale_size or 32 | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| class Lighting(object): | |||||
| """Lighting noise(AlexNet - style PCA - based noise)""" | |||||
| def __init__(self, alphastd, eigval, eigvec): | |||||
| self.alphastd = alphastd | |||||
| self.eigval = eigval | |||||
| self.eigvec = eigvec | |||||
| def __call__(self, img): | |||||
| if self.alphastd == 0: | |||||
| return img | |||||
| alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||||
| rgb = self.eigvec.type_as(img).clone()\ | |||||
| .mul(alpha.view(1, 3).expand(3, 3))\ | |||||
| .mul(self.eigval.view(1, 3).expand(3, 3))\ | |||||
| .sum(1).squeeze() | |||||
| return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||||
| class Grayscale(object): | |||||
| def __call__(self, img): | |||||
| gs = img.clone() | |||||
| gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||||
| gs[1].copy_(gs[0]) | |||||
| gs[2].copy_(gs[0]) | |||||
| return gs | |||||
| class Saturation(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = Grayscale()(img) | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class Brightness(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = img.new().resize_as_(img).zero_() | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class Contrast(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = Grayscale()(img) | |||||
| gs.fill_(gs.mean()) | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class RandomOrder(object): | |||||
| """ Composes several transforms together in random order. | |||||
| """ | |||||
| def __init__(self, transforms): | |||||
| self.transforms = transforms | |||||
| def __call__(self, img): | |||||
| if self.transforms is None: | |||||
| return img | |||||
| order = torch.randperm(len(self.transforms)) | |||||
| for i in order: | |||||
| img = self.transforms[i](img) | |||||
| return img | |||||
| class ColorJitter(RandomOrder): | |||||
| def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||||
| self.transforms = [] | |||||
| if brightness != 0: | |||||
| self.transforms.append(Brightness(brightness)) | |||||
| if contrast != 0: | |||||
| self.transforms.append(Contrast(contrast)) | |||||
| if saturation != 0: | |||||
| self.transforms.append(Saturation(saturation)) | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||||
| 2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:36:47 - INFO - creating model alexnet | |||||
| 2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:36:48 - INFO - number of parameters: 61110184 | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||||
| 2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:37:52 - INFO - creating model resnet | |||||
| 2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:37:52 - INFO - number of parameters: 25557032 | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||||
| 2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:38:16 - INFO - creating model alexnet | |||||
| 2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:38:17 - INFO - number of parameters: 61110184 | |||||
| @ -0,0 +1,160 @@ | |||||
| import os | |||||
| import torch | |||||
| import logging.config | |||||
| import shutil | |||||
| import pandas as pd | |||||
| from bokeh.io import output_file, save, show | |||||
| from bokeh.plotting import figure | |||||
| from bokeh.layouts import column | |||||
| #from bokeh.charts import Line, defaults | |||||
| # | |||||
| #defaults.width = 800 | |||||
| #defaults.height = 400 | |||||
| #defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||||
| def setup_logging(log_file='log.txt'): | |||||
| """Setup logging configuration | |||||
| """ | |||||
| logging.basicConfig(level=logging.DEBUG, | |||||
| format="%(asctime)s - %(levelname)s - %(message)s", | |||||
| datefmt="%Y-%m-%d %H:%M:%S", | |||||
| filename=log_file, | |||||
| filemode='w') | |||||
| console = logging.StreamHandler() | |||||
| console.setLevel(logging.INFO) | |||||
| formatter = logging.Formatter('%(message)s') | |||||
| console.setFormatter(formatter) | |||||
| logging.getLogger('').addHandler(console) | |||||
| class ResultsLog(object): | |||||
| def __init__(self, path='results.csv', plot_path=None): | |||||
| self.path = path | |||||
| self.plot_path = plot_path or (self.path + '.html') | |||||
| self.figures = [] | |||||
| self.results = None | |||||
| def add(self, **kwargs): | |||||
| df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||||
| if self.results is None: | |||||
| self.results = df | |||||
| else: | |||||
| self.results = self.results.append(df, ignore_index=True) | |||||
| def save(self, title='Training Results'): | |||||
| if len(self.figures) > 0: | |||||
| if os.path.isfile(self.plot_path): | |||||
| os.remove(self.plot_path) | |||||
| output_file(self.plot_path, title=title) | |||||
| plot = column(*self.figures) | |||||
| save(plot) | |||||
| self.figures = [] | |||||
| self.results.to_csv(self.path, index=False, index_label=False) | |||||
| def load(self, path=None): | |||||
| path = path or self.path | |||||
| if os.path.isfile(path): | |||||
| self.results.read_csv(path) | |||||
| def show(self): | |||||
| if len(self.figures) > 0: | |||||
| plot = column(*self.figures) | |||||
| show(plot) | |||||
| #def plot(self, *kargs, **kwargs): | |||||
| # line = Line(data=self.results, *kargs, **kwargs) | |||||
| # self.figures.append(line) | |||||
| def image(self, *kargs, **kwargs): | |||||
| fig = figure() | |||||
| fig.image(*kargs, **kwargs) | |||||
| self.figures.append(fig) | |||||
| def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||||
| filename = os.path.join(path, filename) | |||||
| torch.save(state, filename) | |||||
| if is_best: | |||||
| shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||||
| if save_all: | |||||
| shutil.copyfile(filename, os.path.join( | |||||
| path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||||
| class AverageMeter(object): | |||||
| """Computes and stores the average and current value""" | |||||
| def __init__(self): | |||||
| self.reset() | |||||
| def reset(self): | |||||
| self.val = 0 | |||||
| self.avg = 0 | |||||
| self.sum = 0 | |||||
| self.count = 0 | |||||
| def update(self, val, n=1): | |||||
| self.val = val | |||||
| self.sum += val * n | |||||
| self.count += n | |||||
| self.avg = self.sum / self.count | |||||
| __optimizers = { | |||||
| 'SGD': torch.optim.SGD, | |||||
| 'ASGD': torch.optim.ASGD, | |||||
| 'Adam': torch.optim.Adam, | |||||
| 'Adamax': torch.optim.Adamax, | |||||
| 'Adagrad': torch.optim.Adagrad, | |||||
| 'Adadelta': torch.optim.Adadelta, | |||||
| 'Rprop': torch.optim.Rprop, | |||||
| 'RMSprop': torch.optim.RMSprop | |||||
| } | |||||
| def adjust_optimizer(optimizer, epoch, config): | |||||
| """Reconfigures the optimizer according to epoch and config dict""" | |||||
| def modify_optimizer(optimizer, setting): | |||||
| if 'optimizer' in setting: | |||||
| optimizer = __optimizers[setting['optimizer']]( | |||||
| optimizer.param_groups) | |||||
| logging.debug('OPTIMIZER - setting method = %s' % | |||||
| setting['optimizer']) | |||||
| for param_group in optimizer.param_groups: | |||||
| for key in param_group.keys(): | |||||
| if key in setting: | |||||
| logging.debug('OPTIMIZER - setting %s = %s' % | |||||
| (key, setting[key])) | |||||
| param_group[key] = setting[key] | |||||
| return optimizer | |||||
| if callable(config): | |||||
| optimizer = modify_optimizer(optimizer, config(epoch)) | |||||
| else: | |||||
| for e in range(epoch + 1): # run over all epochs - sticky setting | |||||
| if e in config: | |||||
| optimizer = modify_optimizer(optimizer, config[e]) | |||||
| return optimizer | |||||
| def accuracy(output, target, topk=(1,)): | |||||
| """Computes the precision@k for the specified values of k""" | |||||
| maxk = max(topk) | |||||
| batch_size = target.size(0) | |||||
| _, pred = output.float().topk(maxk, 1, True, True) | |||||
| pred = pred.t() | |||||
| correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||||
| res = [] | |||||
| for k in topk: | |||||
| correct_k = correct[:k].view(-1).float().sum(0) | |||||
| res.append(correct_k.mul_(100.0 / batch_size)) | |||||
| return res | |||||
| # kernel_img = model.features[0][0].kernel.data.clone() | |||||
| # kernel_img.add_(-kernel_img.min()) | |||||
| # kernel_img.mul_(255 / kernel_img.max()) | |||||
| # save_image(kernel_img, 'kernel%s.jpg' % epoch) | |||||
| @ -0,0 +1,8 @@ | |||||
| # BNN.pytorch | |||||
| Binarized Neural Network (BNN) for pytorch | |||||
| This is the pytorch version for the BNN code, fro VGG and resnet models | |||||
| Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||||
| The code is based on https://github.com/eladhoffer/convNet.pytorch | |||||
| Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||||
| To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 | |||||
| @ -0,0 +1,37 @@ | |||||
| import os | |||||
| import torchvision.datasets as datasets | |||||
| import torchvision.transforms as transforms | |||||
| _DATASETS_MAIN_PATH = '/home/Datasets' | |||||
| _dataset_path = { | |||||
| 'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||||
| 'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||||
| 'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||||
| 'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||||
| 'imagenet': { | |||||
| 'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||||
| 'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||||
| } | |||||
| } | |||||
| def get_dataset(name, split='train', transform=None, | |||||
| target_transform=None, download=True): | |||||
| train = (split == 'train') | |||||
| if name == 'cifar10': | |||||
| return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||||
| train=train, | |||||
| transform=transform, | |||||
| target_transform=target_transform, | |||||
| download=download) | |||||
| elif name == 'cifar100': | |||||
| return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||||
| train=train, | |||||
| transform=transform, | |||||
| target_transform=target_transform, | |||||
| download=download) | |||||
| elif name == 'imagenet': | |||||
| path = _dataset_path[name][split] | |||||
| return datasets.ImageFolder(root=path, | |||||
| transform=transform, | |||||
| target_transform=target_transform) | |||||
| @ -0,0 +1,309 @@ | |||||
| import argparse | |||||
| import os | |||||
| import time | |||||
| import logging | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.parallel | |||||
| import torch.backends.cudnn as cudnn | |||||
| import torch.optim | |||||
| import torch.utils.data | |||||
| import models | |||||
| from torch.autograd import Variable | |||||
| from data import get_dataset | |||||
| from preprocess import get_transform | |||||
| from utils import * | |||||
| from datetime import datetime | |||||
| from ast import literal_eval | |||||
| from torchvision.utils import save_image | |||||
| model_names = sorted(name for name in models.__dict__ | |||||
| if name.islower() and not name.startswith("__") | |||||
| and callable(models.__dict__[name])) | |||||
| <<<<<<< HEAD | |||||
| print(model_names) | |||||
| ======= | |||||
| >>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||||
| help='results dir') | |||||
| parser.add_argument('--save', metavar='SAVE', default='', | |||||
| help='saved folder') | |||||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
| help='dataset name or folder') | |||||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
| choices=model_names, | |||||
| help='model architecture: ' + | |||||
| ' | '.join(model_names) + | |||||
| ' (default: alexnet)') | |||||
| parser.add_argument('--input_size', type=int, default=None, | |||||
| help='image input size') | |||||
| parser.add_argument('--model_config', default='', | |||||
| help='additional architecture configuration') | |||||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
| parser.add_argument('--gpus', default='0', | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
| help='number of data loading workers (default: 8)') | |||||
| parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||||
| help='number of total epochs to run') | |||||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
| help='manual epoch number (useful on restarts)') | |||||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
| metavar='N', help='mini-batch size (default: 256)') | |||||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
| help='optimizer function used') | |||||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
| metavar='LR', help='initial learning rate') | |||||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
| help='momentum') | |||||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
| metavar='W', help='weight decay (default: 1e-4)') | |||||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
| metavar='N', help='print frequency (default: 10)') | |||||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
| help='path to latest checkpoint (default: none)') | |||||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
| help='evaluate model FILE on validation set') | |||||
| def main(): | |||||
| global args, best_prec1 | |||||
| best_prec1 = 0 | |||||
| args = parser.parse_args() | |||||
| if args.evaluate: | |||||
| args.results_dir = '/tmp' | |||||
| if args.save is '': | |||||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
| save_path = os.path.join(args.results_dir, args.save) | |||||
| if not os.path.exists(save_path): | |||||
| os.makedirs(save_path) | |||||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||||
| results_file = os.path.join(save_path, 'results.%s') | |||||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
| logging.info("saving to %s", save_path) | |||||
| logging.debug("run arguments: %s", args) | |||||
| if 'cuda' in args.type: | |||||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
| torch.cuda.set_device(args.gpus[0]) | |||||
| cudnn.benchmark = True | |||||
| else: | |||||
| args.gpus = None | |||||
| # create model | |||||
| logging.info("creating model %s", args.model) | |||||
| model = models.__dict__[args.model] | |||||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||||
| if args.model_config is not '': | |||||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
| model = model(**model_config) | |||||
| logging.info("created model with configuration: %s", model_config) | |||||
| # optionally resume from a checkpoint | |||||
| if args.evaluate: | |||||
| if not os.path.isfile(args.evaluate): | |||||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
| checkpoint = torch.load(args.evaluate) | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| args.evaluate, checkpoint['epoch']) | |||||
| elif args.resume: | |||||
| checkpoint_file = args.resume | |||||
| if os.path.isdir(checkpoint_file): | |||||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
| checkpoint_file = os.path.join( | |||||
| checkpoint_file, 'model_best.pth.tar') | |||||
| if os.path.isfile(checkpoint_file): | |||||
| logging.info("loading checkpoint '%s'", args.resume) | |||||
| checkpoint = torch.load(checkpoint_file) | |||||
| args.start_epoch = checkpoint['epoch'] - 1 | |||||
| best_prec1 = checkpoint['best_prec1'] | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| checkpoint_file, checkpoint['epoch']) | |||||
| else: | |||||
| logging.error("no checkpoint found at '%s'", args.resume) | |||||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
| logging.info("number of parameters: %d", num_parameters) | |||||
| # Data loading code | |||||
| default_transform = { | |||||
| 'train': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=True), | |||||
| 'eval': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=False) | |||||
| } | |||||
| transform = getattr(model, 'input_transform', default_transform) | |||||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
| 'lr': args.lr, | |||||
| 'momentum': args.momentum, | |||||
| 'weight_decay': args.weight_decay}}) | |||||
| # define loss function (criterion) and optimizer | |||||
| criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||||
| criterion.type(args.type) | |||||
| model.type(args.type) | |||||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
| val_loader = torch.utils.data.DataLoader( | |||||
| val_data, | |||||
| batch_size=args.batch_size, shuffle=False, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| if args.evaluate: | |||||
| validate(val_loader, model, criterion, 0) | |||||
| return | |||||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| train_data, | |||||
| batch_size=args.batch_size, shuffle=True, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
| logging.info('training regime: %s', regime) | |||||
| for epoch in range(args.start_epoch, args.epochs): | |||||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
| # train for one epoch | |||||
| train_loss, train_prec1, train_prec5 = train( | |||||
| train_loader, model, criterion, epoch, optimizer) | |||||
| # evaluate on validation set | |||||
| val_loss, val_prec1, val_prec5 = validate( | |||||
| val_loader, model, criterion, epoch) | |||||
| # remember best prec@1 and save checkpoint | |||||
| is_best = val_prec1 > best_prec1 | |||||
| best_prec1 = max(val_prec1, best_prec1) | |||||
| save_checkpoint({ | |||||
| 'epoch': epoch + 1, | |||||
| 'model': args.model, | |||||
| 'config': args.model_config, | |||||
| 'state_dict': model.state_dict(), | |||||
| 'best_prec1': best_prec1, | |||||
| 'regime': regime | |||||
| }, is_best, path=save_path) | |||||
| logging.info('\n Epoch: {0}\t' | |||||
| 'Training Loss {train_loss:.4f} \t' | |||||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||||
| 'Validation Loss {val_loss:.4f} \t' | |||||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
| #results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
| # title='Loss', ylabel='loss') | |||||
| #results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
| # title='Error@1', ylabel='error %') | |||||
| #results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
| # title='Error@5', ylabel='error %') | |||||
| results.save() | |||||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
| if args.gpus and len(args.gpus) > 1: | |||||
| model = torch.nn.DataParallel(model, args.gpus) | |||||
| batch_time = AverageMeter() | |||||
| data_time = AverageMeter() | |||||
| losses = AverageMeter() | |||||
| top1 = AverageMeter() | |||||
| top5 = AverageMeter() | |||||
| end = time.time() | |||||
| for i, (inputs, target) in enumerate(data_loader): | |||||
| # measure data loading time | |||||
| data_time.update(time.time() - end) | |||||
| if args.gpus is not None: | |||||
| target = target.cuda() | |||||
| if not training: | |||||
| with torch.no_grad(): | |||||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
| target_var = Variable(target) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| else: | |||||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
| target_var = Variable(target) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| loss = criterion(output, target_var) | |||||
| if type(output) is list: | |||||
| output = output[0] | |||||
| # measure accuracy and record loss | |||||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
| losses.update(loss.item(), inputs.size(0)) | |||||
| top1.update(prec1.item(), inputs.size(0)) | |||||
| top5.update(prec5.item(), inputs.size(0)) | |||||
| if training: | |||||
| # compute gradient and do SGD step | |||||
| optimizer.zero_grad() | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.data.copy_(p.org) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| # measure elapsed time | |||||
| batch_time.update(time.time() - end) | |||||
| end = time.time() | |||||
| if i % args.print_freq == 0: | |||||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
| epoch, i, len(data_loader), | |||||
| phase='TRAINING' if training else 'EVALUATING', | |||||
| batch_time=batch_time, | |||||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
| return losses.avg, top1.avg, top5.avg | |||||
| def train(data_loader, model, criterion, epoch, optimizer): | |||||
| # switch to train mode | |||||
| model.train() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=True, optimizer=optimizer) | |||||
| def validate(data_loader, model, criterion, epoch): | |||||
| # switch to evaluate mode | |||||
| model.eval() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=False, optimizer=None) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,332 @@ | |||||
| import argparse | |||||
| import os | |||||
| import time | |||||
| import logging | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.parallel | |||||
| import torch.backends.cudnn as cudnn | |||||
| import torch.optim | |||||
| import torch.utils.data | |||||
| import models | |||||
| from torch.autograd import Variable | |||||
| from data import get_dataset | |||||
| from preprocess import get_transform | |||||
| from utils import * | |||||
| from datetime import datetime | |||||
| from ast import literal_eval | |||||
| from torchvision.utils import save_image | |||||
| from models.binarized_modules import HingeLoss | |||||
| model_names = sorted(name for name in models.__dict__ | |||||
| if name.islower() and not name.startswith("__") | |||||
| and callable(models.__dict__[name])) | |||||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||||
| help='results dir') | |||||
| parser.add_argument('--save', metavar='SAVE', default='', | |||||
| help='saved folder') | |||||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
| help='dataset name or folder') | |||||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
| choices=model_names, | |||||
| help='model architecture: ' + | |||||
| ' | '.join(model_names) + | |||||
| ' (default: alexnet)') | |||||
| parser.add_argument('--input_size', type=int, default=None, | |||||
| help='image input size') | |||||
| parser.add_argument('--model_config', default='', | |||||
| help='additional architecture configuration') | |||||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
| parser.add_argument('--gpus', default='0', | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
| help='number of data loading workers (default: 8)') | |||||
| parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||||
| help='number of total epochs to run') | |||||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
| help='manual epoch number (useful on restarts)') | |||||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
| metavar='N', help='mini-batch size (default: 256)') | |||||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
| help='optimizer function used') | |||||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
| metavar='LR', help='initial learning rate') | |||||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
| help='momentum') | |||||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
| metavar='W', help='weight decay (default: 1e-4)') | |||||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
| metavar='N', help='print frequency (default: 10)') | |||||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
| help='path to latest checkpoint (default: none)') | |||||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
| help='evaluate model FILE on validation set') | |||||
| torch.cuda.random.manual_seed_all(10) | |||||
| output_dim = 0 | |||||
| def main(): | |||||
| global args, best_prec1, output_dim | |||||
| best_prec1 = 0 | |||||
| args = parser.parse_args() | |||||
| output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||||
| #import pdb; pdb.set_trace() | |||||
| #torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||||
| if args.evaluate: | |||||
| args.results_dir = '/tmp' | |||||
| if args.save is '': | |||||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
| save_path = os.path.join(args.results_dir, args.save) | |||||
| if not os.path.exists(save_path): | |||||
| os.makedirs(save_path) | |||||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||||
| results_file = os.path.join(save_path, 'results.%s') | |||||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
| logging.info("saving to %s", save_path) | |||||
| logging.debug("run arguments: %s", args) | |||||
| if 'cuda' in args.type: | |||||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
| torch.cuda.set_device(args.gpus[0]) | |||||
| cudnn.benchmark = True | |||||
| else: | |||||
| args.gpus = None | |||||
| # create model | |||||
| logging.info("creating model %s", args.model) | |||||
| model = models.__dict__[args.model] | |||||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||||
| if args.model_config is not '': | |||||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
| model = model(**model_config) | |||||
| logging.info("created model with configuration: %s", model_config) | |||||
| # optionally resume from a checkpoint | |||||
| if args.evaluate: | |||||
| if not os.path.isfile(args.evaluate): | |||||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
| checkpoint = torch.load(args.evaluate) | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| args.evaluate, checkpoint['epoch']) | |||||
| elif args.resume: | |||||
| checkpoint_file = args.resume | |||||
| if os.path.isdir(checkpoint_file): | |||||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
| checkpoint_file = os.path.join( | |||||
| checkpoint_file, 'model_best.pth.tar') | |||||
| if os.path.isfile(checkpoint_file): | |||||
| logging.info("loading checkpoint '%s'", args.resume) | |||||
| checkpoint = torch.load(checkpoint_file) | |||||
| args.start_epoch = checkpoint['epoch'] - 1 | |||||
| best_prec1 = checkpoint['best_prec1'] | |||||
| model.load_state_dict(checkpoint['state_dict']) | |||||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
| checkpoint_file, checkpoint['epoch']) | |||||
| else: | |||||
| logging.error("no checkpoint found at '%s'", args.resume) | |||||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
| logging.info("number of parameters: %d", num_parameters) | |||||
| # Data loading code | |||||
| default_transform = { | |||||
| 'train': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=True), | |||||
| 'eval': get_transform(args.dataset, | |||||
| input_size=args.input_size, augment=False) | |||||
| } | |||||
| transform = getattr(model, 'input_transform', default_transform) | |||||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
| 'lr': args.lr, | |||||
| 'momentum': args.momentum, | |||||
| 'weight_decay': args.weight_decay}}) | |||||
| # define loss function (criterion) and optimizer | |||||
| #criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||||
| criterion = getattr(model, 'criterion', HingeLoss)() | |||||
| #criterion.type(args.type) | |||||
| model.type(args.type) | |||||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
| val_loader = torch.utils.data.DataLoader( | |||||
| val_data, | |||||
| batch_size=args.batch_size, shuffle=False, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| if args.evaluate: | |||||
| validate(val_loader, model, criterion, 0) | |||||
| return | |||||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| train_data, | |||||
| batch_size=args.batch_size, shuffle=True, | |||||
| num_workers=args.workers, pin_memory=True) | |||||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
| logging.info('training regime: %s', regime) | |||||
| #import pdb; pdb.set_trace() | |||||
| #search_binarized_modules(model) | |||||
| for epoch in range(args.start_epoch, args.epochs): | |||||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
| # train for one epoch | |||||
| train_loss, train_prec1, train_prec5 = train( | |||||
| train_loader, model, criterion, epoch, optimizer) | |||||
| # evaluate on validation set | |||||
| val_loss, val_prec1, val_prec5 = validate( | |||||
| val_loader, model, criterion, epoch) | |||||
| # remember best prec@1 and save checkpoint | |||||
| is_best = val_prec1 > best_prec1 | |||||
| best_prec1 = max(val_prec1, best_prec1) | |||||
| save_checkpoint({ | |||||
| 'epoch': epoch + 1, | |||||
| 'model': args.model, | |||||
| 'config': args.model_config, | |||||
| 'state_dict': model.state_dict(), | |||||
| 'best_prec1': best_prec1, | |||||
| 'regime': regime | |||||
| }, is_best, path=save_path) | |||||
| logging.info('\n Epoch: {0}\t' | |||||
| 'Training Loss {train_loss:.4f} \t' | |||||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||||
| 'Validation Loss {val_loss:.4f} \t' | |||||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
| results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
| title='Loss', ylabel='loss') | |||||
| results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
| title='Error@1', ylabel='error %') | |||||
| results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
| title='Error@5', ylabel='error %') | |||||
| results.save() | |||||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
| if args.gpus and len(args.gpus) > 1: | |||||
| model = torch.nn.DataParallel(model, args.gpus) | |||||
| batch_time = AverageMeter() | |||||
| data_time = AverageMeter() | |||||
| losses = AverageMeter() | |||||
| top1 = AverageMeter() | |||||
| top5 = AverageMeter() | |||||
| end = time.time() | |||||
| for i, (inputs, target) in enumerate(data_loader): | |||||
| # measure data loading time | |||||
| data_time.update(time.time() - end) | |||||
| if args.gpus is not None: | |||||
| target = target.cuda() | |||||
| #import pdb; pdb.set_trace() | |||||
| if criterion.__class__.__name__=='HingeLoss': | |||||
| target=target.unsqueeze(1) | |||||
| target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||||
| target_onehot.fill_(-1) | |||||
| target_onehot.scatter_(1, target, 1) | |||||
| target=target.squeeze() | |||||
| if not training: | |||||
| with torch.no_grad(): | |||||
| input_var = Variable(inputs.type(args.type)) | |||||
| target_var = Variable(target_onehot) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| else: | |||||
| input_var = Variable(inputs.type(args.type)) | |||||
| target_var = Variable(target_onehot) | |||||
| # compute output | |||||
| output = model(input_var) | |||||
| #import pdb; pdb.set_trace() | |||||
| loss = criterion(output, target_onehot) | |||||
| #import pdb; pdb.set_trace() | |||||
| if type(output) is list: | |||||
| output = output[0] | |||||
| # measure accuracy and record loss | |||||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
| losses.update(loss.item(), inputs.size(0)) | |||||
| top1.update(prec1.item(), inputs.size(0)) | |||||
| top5.update(prec5.item(), inputs.size(0)) | |||||
| #import pdb; pdb.set_trace() | |||||
| #if not training and top1.avg<15: | |||||
| # import pdb; pdb.set_trace() | |||||
| if training: | |||||
| # compute gradient and do SGD step | |||||
| optimizer.zero_grad() | |||||
| #add backwoed hook | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| #import pdb; pdb.set_trace() | |||||
| if hasattr(p,'org'): | |||||
| #print('before:', p[0][0]) | |||||
| #gm=max(p.grad.data.max(),-p.grad.data.min()) | |||||
| #p.grad=p.grad.div(gm+1) | |||||
| p.data.copy_(p.org) | |||||
| #print('after:', p[0][0]) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| #import pdb; pdb.set_trace() | |||||
| if hasattr(p,'org'): | |||||
| #print('before:', p[0][0]) | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| #if epoch>30: | |||||
| # import pdb; pdb.set_trace() | |||||
| # measure elapsed time | |||||
| batch_time.update(time.time() - end) | |||||
| end = time.time() | |||||
| if i % args.print_freq == 0: | |||||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
| epoch, i, len(data_loader), | |||||
| phase='TRAINING' if training else 'EVALUATING', | |||||
| batch_time=batch_time, | |||||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
| return losses.avg, top1.avg, top5.avg | |||||
| def train(data_loader, model, criterion, epoch, optimizer): | |||||
| # switch to train mode | |||||
| model.train() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=True, optimizer=optimizer) | |||||
| def validate(data_loader, model, criterion, epoch): | |||||
| # switch to evaluate mode | |||||
| model.eval() | |||||
| return forward(data_loader, model, criterion, epoch, | |||||
| training=False, optimizer=None) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,150 @@ | |||||
| from __future__ import print_function | |||||
| import argparse | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.functional as F | |||||
| import torch.optim as optim | |||||
| from torchvision import datasets, transforms | |||||
| from torch.autograd import Variable | |||||
| from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| from models.binarized_modules import Binarize,HingeLoss | |||||
| # Training settings | |||||
| parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||||
| parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||||
| help='input batch size for training (default: 256)') | |||||
| parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||||
| help='input batch size for testing (default: 1000)') | |||||
| parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||||
| help='number of epochs to train (default: 10)') | |||||
| parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||||
| help='learning rate (default: 0.001)') | |||||
| parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||||
| help='SGD momentum (default: 0.5)') | |||||
| parser.add_argument('--no-cuda', action='store_true', default=False, | |||||
| help='disables CUDA training') | |||||
| parser.add_argument('--seed', type=int, default=1, metavar='S', | |||||
| help='random seed (default: 1)') | |||||
| parser.add_argument('--gpus', default=3, | |||||
| help='gpus used for training - e.g 0,1,3') | |||||
| parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||||
| help='how many batches to wait before logging training status') | |||||
| args = parser.parse_args() | |||||
| args.cuda = not args.no_cuda and torch.cuda.is_available() | |||||
| torch.manual_seed(args.seed) | |||||
| if args.cuda: | |||||
| torch.cuda.manual_seed(args.seed) | |||||
| kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||||
| train_loader = torch.utils.data.DataLoader( | |||||
| datasets.MNIST('../data', train=True, download=True, | |||||
| transform=transforms.Compose([ | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize((0.1307,), (0.3081,)) | |||||
| ])), | |||||
| batch_size=args.batch_size, shuffle=True, **kwargs) | |||||
| test_loader = torch.utils.data.DataLoader( | |||||
| datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize((0.1307,), (0.3081,)) | |||||
| ])), | |||||
| batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||||
| class Net(nn.Module): | |||||
| def __init__(self): | |||||
| super(Net, self).__init__() | |||||
| self.infl_ratio=3 | |||||
| self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||||
| self.htanh1 = nn.Hardtanh() | |||||
| self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
| self.htanh2 = nn.Hardtanh() | |||||
| self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
| self.htanh3 = nn.Hardtanh() | |||||
| self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
| self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||||
| self.logsoftmax=nn.LogSoftmax() | |||||
| self.drop=nn.Dropout(0.5) | |||||
| def forward(self, x): | |||||
| x = x.view(-1, 28*28) | |||||
| x = self.fc1(x) | |||||
| x = self.bn1(x) | |||||
| x = self.htanh1(x) | |||||
| x = self.fc2(x) | |||||
| x = self.bn2(x) | |||||
| x = self.htanh2(x) | |||||
| x = self.fc3(x) | |||||
| x = self.drop(x) | |||||
| x = self.bn3(x) | |||||
| x = self.htanh3(x) | |||||
| x = self.fc4(x) | |||||
| return self.logsoftmax(x) | |||||
| model = Net() | |||||
| if args.cuda: | |||||
| torch.cuda.set_device(3) | |||||
| model.cuda() | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||||
| def train(epoch): | |||||
| model.train() | |||||
| for batch_idx, (data, target) in enumerate(train_loader): | |||||
| if args.cuda: | |||||
| data, target = data.cuda(), target.cuda() | |||||
| data, target = Variable(data), Variable(target) | |||||
| optimizer.zero_grad() | |||||
| output = model(data) | |||||
| loss = criterion(output, target) | |||||
| if epoch%40==0: | |||||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
| optimizer.zero_grad() | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.data.copy_(p.org) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| if batch_idx % args.log_interval == 0: | |||||
| print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||||
| epoch, batch_idx * len(data), len(train_loader.dataset), | |||||
| 100. * batch_idx / len(train_loader), loss.item())) | |||||
| def test(): | |||||
| model.eval() | |||||
| test_loss = 0 | |||||
| correct = 0 | |||||
| with torch.no_grad(): | |||||
| for data, target in test_loader: | |||||
| if args.cuda: | |||||
| data, target = data.cuda(), target.cuda() | |||||
| data, target = Variable(data), Variable(target) | |||||
| output = model(data) | |||||
| test_loss += criterion(output, target).item() # sum up batch loss | |||||
| pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||||
| correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||||
| test_loss /= len(test_loader.dataset) | |||||
| print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||||
| test_loss, correct, len(test_loader.dataset), | |||||
| 100. * correct / len(test_loader.dataset))) | |||||
| for epoch in range(1, args.epochs + 1): | |||||
| train(epoch) | |||||
| test() | |||||
| if epoch%40==0: | |||||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
| @ -0,0 +1,6 @@ | |||||
| from .alexnet import * | |||||
| from .alexnet_binary import * | |||||
| from .resnet import * | |||||
| from .resnet_binary import * | |||||
| from .vgg_cifar10_binary import * | |||||
| @ -0,0 +1,78 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| __all__ = ['alexnet'] | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.features = nn.Sequential( | |||||
| nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||||
| bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(64), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(192), | |||||
| nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(384), | |||||
| nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| nn.Linear(256 * 6 * 6, 4096, bias=False), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(4096, 4096, bias=False), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(4096, num_classes) | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| 10: {'lr': 5e-3}, | |||||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 20: {'lr': 5e-4}, | |||||
| 25: {'lr': 1e-4} | |||||
| } | |||||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
| std=[0.229, 0.224, 0.225]) | |||||
| self.input_transform = { | |||||
| 'train': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.RandomCrop(224), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]), | |||||
| 'eval': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.CenterCrop(224), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]) | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 256 * 6 * 6) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def alexnet(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,92 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| __all__ = ['alexnet_binary'] | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.ratioInfl=3 | |||||
| self.features = nn.Sequential( | |||||
| BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(int(64*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(int(192*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||||
| nn.BatchNorm2d(int(384*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||||
| nn.BatchNorm2d(int(256*self.ratioInfl)), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Hardtanh(inplace=True) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| BinarizeLinear(256 * 6 * 6, 4096), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(4096, 4096), | |||||
| nn.BatchNorm1d(4096), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(4096, num_classes), | |||||
| nn.BatchNorm1d(1000), | |||||
| nn.LogSoftmax() | |||||
| ) | |||||
| #self.regime = { | |||||
| # 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| # 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| # 10: {'lr': 5e-3}, | |||||
| # 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| # 20: {'lr': 5e-4}, | |||||
| # 25: {'lr': 1e-4} | |||||
| #} | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
| 20: {'lr': 1e-3}, | |||||
| 30: {'lr': 5e-4}, | |||||
| 35: {'lr': 1e-4}, | |||||
| 40: {'lr': 1e-5} | |||||
| } | |||||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
| std=[0.229, 0.224, 0.225]) | |||||
| self.input_transform = { | |||||
| 'train': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.RandomCrop(224), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]), | |||||
| 'eval': transforms.Compose([ | |||||
| transforms.Scale(256), | |||||
| transforms.CenterCrop(224), | |||||
| transforms.ToTensor(), | |||||
| normalize | |||||
| ]) | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 256 * 6 * 6) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def alexnet_binary(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,423 @@ | |||||
| import torch | |||||
| import pdb | |||||
| import torch.nn as nn | |||||
| import math | |||||
| from torch.autograd import Variable | |||||
| from torch.autograd import Function | |||||
| from decimal import Decimal, ROUND_HALF_UP | |||||
| import numpy as np | |||||
| def Binarize(tensor,quant_mode='det'): | |||||
| if quant_mode=='det': | |||||
| return tensor.sign() | |||||
| else: | |||||
| return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||||
| class HingeLoss(nn.Module): | |||||
| def __init__(self): | |||||
| super(HingeLoss,self).__init__() | |||||
| self.margin=1.0 | |||||
| def hinge_loss(self,input,target): | |||||
| #import pdb; pdb.set_trace() | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| return output.mean() | |||||
| def forward(self, input, target): | |||||
| return self.hinge_loss(input,target) | |||||
| class SqrtHingeLossFunction(Function): | |||||
| def __init__(self): | |||||
| super(SqrtHingeLossFunction,self).__init__() | |||||
| self.margin=1.0 | |||||
| def forward(self, input, target): | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| self.save_for_backward(input, target) | |||||
| loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||||
| return loss | |||||
| def backward(self,grad_output): | |||||
| input, target = self.saved_tensors | |||||
| output=self.margin-input.mul(target) | |||||
| output[output.le(0)]=0 | |||||
| import pdb; pdb.set_trace() | |||||
| grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||||
| grad_output.mul_(output.ne(0).float()) | |||||
| grad_output.div_(input.numel()) | |||||
| return grad_output,grad_output | |||||
| def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||||
| tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||||
| if quant_mode=='det': | |||||
| tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||||
| else: | |||||
| tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||||
| quant_fixed(tensor, params) | |||||
| return tensor | |||||
| #import torch.nn._functions as tnnf | |||||
| class BinarizeLinear(nn.Linear): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||||
| def forward(self, input): | |||||
| # if input.size(1) != 784: | |||||
| # input.data=Binarize(input.data) | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| out = nn.functional.linear(input, self.weight) | |||||
| if not self.bias is None: | |||||
| self.bias.org=self.bias.data.clone() | |||||
| out += self.bias.view(1, -1).expand_as(out) | |||||
| return out | |||||
| class BinarizeConv2d(nn.Conv2d): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||||
| def forward(self, input): | |||||
| # if input.size(1) != 3: | |||||
| # input.data = Binarize(input.data) | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| #input = torch.round(input) | |||||
| #input = input*2-1 | |||||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
| #input = torch.round(input*2 / scale) - 63 | |||||
| #if scale != 0: | |||||
| # input = torch.round(input / scale) | |||||
| #print (torch.max(input)) | |||||
| #print(input) | |||||
| input = torch.round(input) | |||||
| #print(input) | |||||
| #print (torch.max(input)) | |||||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
| self.padding, self.dilation, self.groups) | |||||
| #print (torch.min(out), torch.max(out)) | |||||
| #out = torch.round(out) | |||||
| #print (torch.min(out), torch.max(out)) | |||||
| #print (torch.min(input), torch.max(input)) | |||||
| #out = torch.round(out / 64 * 36 / 64) | |||||
| #print (self.weight.size()[1]) | |||||
| #if self.weight.size()[1] >= 16 and self.weight.size()[1] <= 24: | |||||
| if self.weight.size()[1] >= 4 and self.weight.size()[2] * self.weight.size()[3] == 9: | |||||
| out = torch.round(out / 64 * 36 / 64) | |||||
| elif self.weight.size()[1] == 1: | |||||
| out = torch.round(out * 7 / 64) | |||||
| else: | |||||
| out = torch.round(out / 64) | |||||
| out = out * 4 | |||||
| out[out > 63] = 63 | |||||
| out[out < -63] = -63 | |||||
| #out = out - torch.round(torch.mean(out)) | |||||
| # out = out*4 | |||||
| #out[out > 63] = 63 | |||||
| #out[out < -63] = -63 | |||||
| #else: | |||||
| # out = torch.round(out * 10 / 64) | |||||
| #print (torch.min(out), torch.max(out)) | |||||
| # if not self.bias is None: | |||||
| # self.bias.org=self.bias.data.clone() | |||||
| # out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
| return out | |||||
| class IdealCimConv2d(nn.Conv2d): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(IdealCimConv2d, self).__init__(*kargs, **kwargs) | |||||
| def forward(self, input): | |||||
| # if input.size(1) != 3: | |||||
| # input.data = Binarize(input.data) | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| #input = torch.round(input) | |||||
| #input = input*2-1 | |||||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
| #input = torch.round(input*2 / scale) - 63 | |||||
| #if scale != 0: | |||||
| # input = torch.round(input / scale) | |||||
| #print (torch.max(input)) | |||||
| #print(input) | |||||
| input = torch.round(input) | |||||
| #print(input) | |||||
| #print (torch.max(input)) | |||||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
| self.padding, self.dilation, self.groups) | |||||
| out = out / 64 | |||||
| out = out * 4 | |||||
| out[out > 63] = 63 | |||||
| out[out < -63] = -63 | |||||
| return out | |||||
| device = 'cuda:0' | |||||
| ''' | |||||
| H = [1024, 512] | |||||
| sim_model = torch.nn.Sequential( | |||||
| torch.nn.Linear(36, H[0]), | |||||
| torch.nn.Dropout(p=0.5), | |||||
| torch.nn.ReLU(), | |||||
| torch.nn.Linear(H[0], H[1]), | |||||
| torch.nn.Dropout(p=0.5), | |||||
| torch.nn.ReLU(), | |||||
| torch.nn.Linear(H[-1], 1), | |||||
| ) | |||||
| sim_model.load_state_dict(torch.load('model_error.ckpt', map_location=torch.device('cuda:0'))) | |||||
| sim_model = sim_model.to(device) | |||||
| sim_model.eval() | |||||
| ''' | |||||
| class CimSimConv2d(nn.Conv2d): | |||||
| def __init__(self, *kargs, **kwargs): | |||||
| super(CimSimConv2d, self).__init__(*kargs, **kwargs) | |||||
| self.device = device | |||||
| def forward(self, input): | |||||
| if not hasattr(self.weight,'org'): | |||||
| self.weight.org=self.weight.data.clone() | |||||
| self.weight.data=Binarize(self.weight.org) | |||||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
| #if scale != 0: | |||||
| # input = torch.round(input / scale) | |||||
| #''' random error | |||||
| #out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
| # self.padding, self.dilation, self.groups) | |||||
| #out = torch.round(out / 64 * 36 / 64) | |||||
| #randrange = (self.weight.size()[1] // 4) | |||||
| #for _ in range(randrange): | |||||
| # out += torch.randint(-1, 1, out.size(), device=device) | |||||
| #out[out>63] = 63 | |||||
| #out[out<-63] -63 | |||||
| #''' | |||||
| input = torch.round(input) | |||||
| out2 = self.simconv(input, self.weight) | |||||
| ''' | |||||
| if torch.max(out2) < 32: | |||||
| out2 = out2 * 2 | |||||
| if torch.max(out2) < 32: | |||||
| out2 = out2 * 2 | |||||
| if torch.max(out2) < 32: | |||||
| out2 = out2 * 2 | |||||
| ''' | |||||
| out2 = out2 * 4 | |||||
| out2[out2 > 63] = 63 | |||||
| out2[out2 < -63] = -63 | |||||
| #print (self.weight.data.size()) | |||||
| #print (torch.max(out2), torch.min(out2)) | |||||
| #print (torch.max(out-out2), torch.min(out-out2)) | |||||
| #out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
| # self.padding, self.dilation, self.groups) | |||||
| #print(input.size(), self.weight.size(), out.size()) | |||||
| #if not self.bias is None: | |||||
| # self.bias.org=self.bias.data.clone() | |||||
| # out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
| return out2 | |||||
| def simconv(self, input_a, weight): | |||||
| #print(input_a.size(), weight.size()) | |||||
| batch_size = input_a.size()[0] | |||||
| out_channel = weight.size()[0] | |||||
| out_width = input_a.size()[2] - 2 * (weight.size()[2] // 2) | |||||
| out_height = input_a.size()[3] - 2 * (weight.size()[3] // 2) | |||||
| simout = torch.zeros(batch_size, out_channel, out_width, out_height, dtype = input_a.dtype).to(device) | |||||
| first = True | |||||
| #''' Mapping Table | |||||
| if weight.size()[2] == 7: | |||||
| kernel_group = 1 | |||||
| else: | |||||
| kernel_group = 4 | |||||
| Digital_input_split = torch.split(input_a, kernel_group, dim=1) | |||||
| binary_weight_split = torch.split(weight, kernel_group, dim=1) | |||||
| for i in range(len(Digital_input_split)): | |||||
| temp_output = nn.functional.conv2d(Digital_input_split[i], binary_weight_split[i], None, self.stride, self.padding, self.dilation, self.groups) | |||||
| #temp_output = torch.round(temp_output / 64 * 36 / 64) | |||||
| temp_output = torch.round(temp_output / 64) | |||||
| temp_output = Mapping.apply(temp_output) | |||||
| simout += temp_output + 2 | |||||
| #print (torch.max(simout), torch.min(simout)) | |||||
| #''' | |||||
| ''' Error model | |||||
| for n in range(batch_size): | |||||
| for c in range(out_channel): | |||||
| w = torch.reshape(weight[c], (-1,)).to(device) | |||||
| inputs = [] | |||||
| for i in range(out_width): | |||||
| for j in range(out_height): | |||||
| input = torch.reshape(input_a[n, :, i: i + weight.size()[2], j: j + weight.size()[3]], (-1,)) | |||||
| #print (w.size(), input.size()) | |||||
| # simout[n][c][i][j] = sum(w*input) | |||||
| # TODO | |||||
| simout[n][c][i][j] = self.cim_conv_tmp(input, w) | |||||
| #''' | |||||
| #print (len(input)) | |||||
| #print (simout.size()) | |||||
| # out = nn.functional.conv2d(input_a, weight) | |||||
| return simout | |||||
| def cim_conv_tmp(self, input, weight): | |||||
| assert len(input) == len(weight) | |||||
| raw_sum = 0 | |||||
| if len(weight) == 3: | |||||
| for i in range((len(input)-1) // 36 + 1): | |||||
| data_x = input[i*36:i*36+36] * weight[i*36:i*36+36] | |||||
| row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||||
| #''' Error model | |||||
| if len(data_x) < 36: | |||||
| data_x = torch.cat((data_x, torch.zeros(36 - len(data_x), dtype=data_x.dtype))) | |||||
| try: | |||||
| #ensor_x = torch.Tensor(data_x).to(self.device) | |||||
| tensor_x = data_x.to(device) | |||||
| except: | |||||
| print (data_x, len()) | |||||
| y_pred = sim_model(tensor_x) | |||||
| if int(y_pred[0]) > 10: | |||||
| adjust = 10 | |||||
| elif int(y_pred[0]) < -10: | |||||
| adjust = -10 | |||||
| else: | |||||
| adjust = int(y_pred[0]) | |||||
| #print (tensor_x, y_pred) | |||||
| raw_sum += (row + adjust + 2) | |||||
| #''' | |||||
| #if row in self.mappingTable: | |||||
| # row = self.mappingTable[row] | |||||
| #raw_sum += row | |||||
| #raw_sum += row | |||||
| else: | |||||
| for i in range((len(input)-1) // 49 + 1): | |||||
| data_x = input[i*49:i*49+49] * weight[i*49:i*49+49] | |||||
| row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||||
| #''' Error model | |||||
| if len(data_x) < 49: | |||||
| data_x = torch.cat((data_x, torch.zeros(49 - len(data_x), dtype=data_x.dtype))) | |||||
| try: | |||||
| #ensor_x = torch.Tensor(data_x).to(self.device) | |||||
| tensor_x = data_x.to(device) | |||||
| except: | |||||
| print (data_x, len()) | |||||
| y_pred = sim_model(tensor_x) | |||||
| if int(y_pred[0]) > 10: | |||||
| adjust = 10 | |||||
| elif int(y_pred[0]) < -10: | |||||
| adjust = -10 | |||||
| else: | |||||
| adjust = int(y_pred[0]) | |||||
| #print (tensor_x, y_pred) | |||||
| raw_sum += (row + adjust + 2) | |||||
| #print (raw_sum) | |||||
| return raw_sum | |||||
| class Mapping(torch.autograd.Function): | |||||
| @staticmethod | |||||
| def forward(ctx, input): | |||||
| output = input.clone() | |||||
| output[input==-1] = -4 | |||||
| output[input==-2] = -5 | |||||
| output[input==-3] = -6 | |||||
| output[input==-4] = -7 | |||||
| output[input==-5] = -9 | |||||
| output[input==-6] = -9 | |||||
| output[input==-7] = -11 | |||||
| output[input==-8] = -11 | |||||
| output[input==-9] = -13 | |||||
| output[input==-10] = -13 | |||||
| output[input==-11] = -17 | |||||
| output[input==-12] = -17 | |||||
| output[input==-13] = -17 | |||||
| output[input==-14] = -19 | |||||
| output[input==-15] = -19 | |||||
| output[input==-16] = -21 | |||||
| output[input==-17] = -21 | |||||
| output[input==-18] = -23 | |||||
| output[input==-19] = -25 | |||||
| output[input==-20] = -25 | |||||
| output[input==-21] = -25 | |||||
| output[input==-22] = -25 | |||||
| output[input==-23] = -27 | |||||
| output[input==-24] = -27 | |||||
| output[input==-25] = -29 | |||||
| output[input==-26] = -29 | |||||
| output[input==-27] = -29 | |||||
| output[input==-28] = -31 | |||||
| output[input==-29] = -31 | |||||
| output[input==-30] = -33 | |||||
| output[input==-31] = -33 | |||||
| output[input==-32] = -35 | |||||
| output[input==-33] = -35 | |||||
| output[input==-34] = -35 | |||||
| #output[input==-35] = -35 | |||||
| output[input==0] = -2 | |||||
| output[input==1] = -1 | |||||
| output[input==2] = 1 | |||||
| output[input==3] = 2 | |||||
| #output[input==4] = 4 | |||||
| output[input==5] = 4 | |||||
| #output[input==6] = 6 | |||||
| output[input==7] = 8 | |||||
| #output[input==8] = 8 | |||||
| output[input==9] = 10 | |||||
| #output[input==10] = 10 | |||||
| output[input==11] = 12 | |||||
| #output[input==12] = 12 | |||||
| output[input==13] = 16 | |||||
| output[input==14] = 16 | |||||
| output[input==15] = 16 | |||||
| #output[input==16] = 16 | |||||
| output[input==17] = 18 | |||||
| output[input==18] = 20 | |||||
| output[input==19] = 20 | |||||
| output[input==20] = 24 | |||||
| output[input==21] = 24 | |||||
| output[input==22] = 24 | |||||
| output[input==23] = 26 | |||||
| output[input==24] = 26 | |||||
| output[input==25] = 28 | |||||
| output[input==26] = 28 | |||||
| output[input==27] = 28 | |||||
| output[input==28] = 30 | |||||
| output[input==29] = 30 | |||||
| output[input==30] = 32 | |||||
| output[input==31] = 32 | |||||
| output[input==32] = 34 | |||||
| output[input==33] = 34 | |||||
| output[input==34] = 34 | |||||
| output[input==35] = 34 | |||||
| return output | |||||
| def backward(ctx, grad_output): | |||||
| return grad_output | |||||
| @ -0,0 +1,217 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| import math | |||||
| __all__ = ['resnet'] | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def init_model(model): | |||||
| for m in model.modules(): | |||||
| if isinstance(m, nn.Conv2d): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
| elif isinstance(m, nn.BatchNorm2d): | |||||
| m.weight.data.fill_(1) | |||||
| m.bias.data.zero_() | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.conv2 = conv3x3(planes, planes) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| out = self.relu(out) | |||||
| return out | |||||
| class Bottleneck(nn.Module): | |||||
| expansion = 4 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(Bottleneck, self).__init__() | |||||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| out = self.relu(out) | |||||
| out = self.conv3(out) | |||||
| out = self.bn3(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| out = self.relu(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| nn.Conv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.bn1(x) | |||||
| x = self.relu(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.fc(x) | |||||
| return x | |||||
| class ResNet_imagenet(ResNet): | |||||
| def __init__(self, num_classes=1000, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
| super(ResNet_imagenet, self).__init__() | |||||
| self.inplanes = 64 | |||||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(64) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
| self.avgpool = nn.AvgPool2d(7) | |||||
| self.fc = nn.Linear(512 * block.expansion, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 30: {'lr': 1e-2}, | |||||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 90: {'lr': 1e-4} | |||||
| } | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=10, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inplanes = 16 | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(16) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.maxpool = lambda x: x | |||||
| self.layer1 = self._make_layer(block, 16, n) | |||||
| self.layer2 = self._make_layer(block, 32, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64, n, stride=2) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.fc = nn.Linear(64, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 81: {'lr': 1e-2}, | |||||
| 122: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 164: {'lr': 1e-4} | |||||
| } | |||||
| def resnet(**kwargs): | |||||
| num_classes, depth, dataset = map( | |||||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
| if dataset == 'imagenet': | |||||
| num_classes = num_classes or 1000 | |||||
| depth = depth or 50 | |||||
| if depth == 18: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
| if depth == 34: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
| if depth == 50: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
| if depth == 101: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
| if depth == 152: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
| elif dataset == 'cifar10': | |||||
| num_classes = num_classes or 10 | |||||
| depth = depth or 18 #56 | |||||
| return ResNet_cifar10(num_classes=num_classes, | |||||
| block=BasicBlock, depth=depth) | |||||
| @ -0,0 +1,248 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| import math | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| __all__ = ['resnet_binary'] | |||||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def init_model(model): | |||||
| for m in model.modules(): | |||||
| if isinstance(m, BinarizeConv2d): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
| elif isinstance(m, nn.BatchNorm2d): | |||||
| m.weight.data.fill_(1) | |||||
| m.bias.data.zero_() | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.conv2 = Binaryconv3x3(planes, planes) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.do_bntan=do_bntan; | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x.clone() | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh1(out) | |||||
| out = self.conv2(out) | |||||
| if self.downsample is not None: | |||||
| if residual.data.max()>1: | |||||
| import pdb; pdb.set_trace() | |||||
| residual = self.downsample(residual) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class Bottleneck(nn.Module): | |||||
| expansion = 4 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
| super(Bottleneck, self).__init__() | |||||
| self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
| self.tanh = nn.Hardtanh(inplace=True) | |||||
| self.downsample = downsample | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| import pdb; pdb.set_trace() | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh(out) | |||||
| out = self.conv2(out) | |||||
| out = self.bn2(out) | |||||
| out = self.tanh(out) | |||||
| out = self.conv3(out) | |||||
| out = self.bn3(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks-1): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.bn1(x) | |||||
| x = self.tanh1(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.bn2(x) | |||||
| x = self.tanh2(x) | |||||
| x = self.fc(x) | |||||
| x = self.bn3(x) | |||||
| x = self.logsoftmax(x) | |||||
| return x | |||||
| class ResNet_imagenet(ResNet): | |||||
| def __init__(self, num_classes=1000, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
| super(ResNet_imagenet, self).__init__() | |||||
| self.inplanes = 64 | |||||
| self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| self.bn1 = nn.BatchNorm2d(64) | |||||
| self.tanh = nn.Hardtanh(inplace=True) | |||||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
| self.avgpool = nn.AvgPool2d(7) | |||||
| self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||||
| init_model(self) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| 30: {'lr': 1e-2}, | |||||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 90: {'lr': 1e-4} | |||||
| } | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=10, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inflate = 5 | |||||
| self.inplanes = 16*self.inflate | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.maxpool = lambda x: x | |||||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
| self.bn3 = nn.BatchNorm1d(10) | |||||
| self.logsoftmax = nn.LogSoftmax() | |||||
| self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||||
| init_model(self) | |||||
| #self.regime = { | |||||
| # 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
| # 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
| # 81: {'lr': 1e-4}, | |||||
| # 122: {'lr': 1e-5, 'weight_decay': 0}, | |||||
| # 164: {'lr': 1e-6} | |||||
| #} | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
| 101: {'lr': 1e-3}, | |||||
| 142: {'lr': 5e-4}, | |||||
| 184: {'lr': 1e-4}, | |||||
| 220: {'lr': 1e-5} | |||||
| } | |||||
| def resnet_binary(**kwargs): | |||||
| num_classes, depth, dataset = map( | |||||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
| if dataset == 'imagenet': | |||||
| num_classes = num_classes or 1000 | |||||
| depth = depth or 50 | |||||
| if depth == 18: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
| if depth == 34: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
| if depth == 50: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
| if depth == 101: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
| if depth == 152: | |||||
| return ResNet_imagenet(num_classes=num_classes, | |||||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
| elif dataset == 'cifar10': | |||||
| num_classes = num_classes or 10 | |||||
| depth = depth or 18 | |||||
| return ResNet_cifar10(num_classes=num_classes, | |||||
| block=BasicBlock, depth=depth) | |||||
| @ -0,0 +1,69 @@ | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| class AlexNetOWT_BN(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(AlexNetOWT_BN, self).__init__() | |||||
| self.features = nn.Sequential( | |||||
| nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||||
| bias=False), | |||||
| nn.BatchNorm2d(128), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(128), | |||||
| nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(256), | |||||
| nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(512), | |||||
| nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.BatchNorm2d(512), | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| nn.Linear(512 * 4 * 4, 1024, bias=False), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(1024, 1024, bias=False), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.ReLU(inplace=True), | |||||
| nn.Dropout(0.5), | |||||
| nn.Linear(1024, num_classes) | |||||
| nn.LogSoftMax() | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
| 10: {'lr': 5e-3}, | |||||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
| 20: {'lr': 5e-4}, | |||||
| 25: {'lr': 1e-4} | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 512 * 4 * 4) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def model(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 1000) | |||||
| return AlexNetOWT_BN(num_classes) | |||||
| @ -0,0 +1,80 @@ | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torch.autograd import Function | |||||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| class VGG_Cifar10(nn.Module): | |||||
| def __init__(self, num_classes=1000): | |||||
| super(VGG_Cifar10, self).__init__() | |||||
| self.infl_ratio=3; | |||||
| self.features = nn.Sequential( | |||||
| BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||||
| bias=True), | |||||
| nn.BatchNorm2d(128*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(128*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.BatchNorm2d(256*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(256*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
| nn.BatchNorm2d(512*self.infl_ratio), | |||||
| nn.Hardtanh(inplace=True), | |||||
| BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||||
| nn.BatchNorm2d(512), | |||||
| nn.Hardtanh(inplace=True) | |||||
| ) | |||||
| self.classifier = nn.Sequential( | |||||
| BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(1024, 1024, bias=True), | |||||
| nn.BatchNorm1d(1024), | |||||
| nn.Hardtanh(inplace=True), | |||||
| #nn.Dropout(0.5), | |||||
| BinarizeLinear(1024, num_classes, bias=True), | |||||
| nn.BatchNorm1d(num_classes, affine=False), | |||||
| nn.LogSoftmax() | |||||
| ) | |||||
| self.regime = { | |||||
| 0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||||
| 40: {'lr': 1e-3}, | |||||
| 80: {'lr': 5e-4}, | |||||
| 100: {'lr': 1e-4}, | |||||
| 120: {'lr': 5e-5}, | |||||
| 140: {'lr': 1e-5} | |||||
| } | |||||
| def forward(self, x): | |||||
| x = self.features(x) | |||||
| x = x.view(-1, 512 * 4 * 4) | |||||
| x = self.classifier(x) | |||||
| return x | |||||
| def vgg_cifar10_binary(**kwargs): | |||||
| num_classes = kwargs.get( 'num_classes', 10) | |||||
| return VGG_Cifar10(num_classes) | |||||
| @ -0,0 +1,198 @@ | |||||
| import torch | |||||
| import torchvision.transforms as transforms | |||||
| import random | |||||
| __imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||||
| 'std': [0.229, 0.224, 0.225]} | |||||
| __imagenet_pca = { | |||||
| 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||||
| 'eigvec': torch.Tensor([ | |||||
| [-0.5675, 0.7192, 0.4009], | |||||
| [-0.5808, -0.0045, -0.8140], | |||||
| [-0.5836, -0.6948, 0.4203], | |||||
| ]) | |||||
| } | |||||
| def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| t_list = [ | |||||
| transforms.CenterCrop(input_size), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ] | |||||
| if scale_size != input_size: | |||||
| t_list = [transforms.Scale(scale_size)] + t_list | |||||
| return transforms.Compose(t_list) | |||||
| def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| t_list = [ | |||||
| transforms.RandomCrop(input_size), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ] | |||||
| if scale_size != input_size: | |||||
| t_list = [transforms.Scale(scale_size)] + t_list | |||||
| transforms.Compose(t_list) | |||||
| def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
| padding = int((scale_size - input_size) / 2) | |||||
| return transforms.Compose([ | |||||
| transforms.RandomCrop(input_size, padding=padding), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize), | |||||
| ]) | |||||
| def inception_preproccess(input_size, normalize=__imagenet_stats): | |||||
| return transforms.Compose([ | |||||
| transforms.RandomSizedCrop(input_size), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(**normalize) | |||||
| ]) | |||||
| def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||||
| return transforms.Compose([ | |||||
| transforms.RandomSizedCrop(input_size), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| ColorJitter( | |||||
| brightness=0.4, | |||||
| contrast=0.4, | |||||
| saturation=0.4, | |||||
| ), | |||||
| Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||||
| transforms.Normalize(**normalize) | |||||
| ]) | |||||
| def get_transform(name='imagenet', input_size=None, | |||||
| scale_size=None, normalize=None, augment=True): | |||||
| normalize = normalize or __imagenet_stats | |||||
| if name == 'imagenet': | |||||
| scale_size = scale_size or 256 | |||||
| input_size = input_size or 224 | |||||
| if augment: | |||||
| return inception_preproccess(input_size, normalize=normalize) | |||||
| else: | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| elif 'cifar' in name: | |||||
| input_size = input_size or 32 | |||||
| if augment: | |||||
| scale_size = scale_size or 40 | |||||
| return pad_random_crop(input_size, scale_size=scale_size, | |||||
| normalize=normalize) | |||||
| else: | |||||
| scale_size = scale_size or 32 | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| elif name == 'mnist': | |||||
| normalize = {'mean': [0.5], 'std': [0.5]} | |||||
| input_size = input_size or 28 | |||||
| if augment: | |||||
| scale_size = scale_size or 32 | |||||
| return pad_random_crop(input_size, scale_size=scale_size, | |||||
| normalize=normalize) | |||||
| else: | |||||
| scale_size = scale_size or 32 | |||||
| return scale_crop(input_size=input_size, | |||||
| scale_size=scale_size, normalize=normalize) | |||||
| class Lighting(object): | |||||
| """Lighting noise(AlexNet - style PCA - based noise)""" | |||||
| def __init__(self, alphastd, eigval, eigvec): | |||||
| self.alphastd = alphastd | |||||
| self.eigval = eigval | |||||
| self.eigvec = eigvec | |||||
| def __call__(self, img): | |||||
| if self.alphastd == 0: | |||||
| return img | |||||
| alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||||
| rgb = self.eigvec.type_as(img).clone()\ | |||||
| .mul(alpha.view(1, 3).expand(3, 3))\ | |||||
| .mul(self.eigval.view(1, 3).expand(3, 3))\ | |||||
| .sum(1).squeeze() | |||||
| return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||||
| class Grayscale(object): | |||||
| def __call__(self, img): | |||||
| gs = img.clone() | |||||
| gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||||
| gs[1].copy_(gs[0]) | |||||
| gs[2].copy_(gs[0]) | |||||
| return gs | |||||
| class Saturation(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = Grayscale()(img) | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class Brightness(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = img.new().resize_as_(img).zero_() | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class Contrast(object): | |||||
| def __init__(self, var): | |||||
| self.var = var | |||||
| def __call__(self, img): | |||||
| gs = Grayscale()(img) | |||||
| gs.fill_(gs.mean()) | |||||
| alpha = random.uniform(0, self.var) | |||||
| return img.lerp(gs, alpha) | |||||
| class RandomOrder(object): | |||||
| """ Composes several transforms together in random order. | |||||
| """ | |||||
| def __init__(self, transforms): | |||||
| self.transforms = transforms | |||||
| def __call__(self, img): | |||||
| if self.transforms is None: | |||||
| return img | |||||
| order = torch.randperm(len(self.transforms)) | |||||
| for i in order: | |||||
| img = self.transforms[i](img) | |||||
| return img | |||||
| class ColorJitter(RandomOrder): | |||||
| def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||||
| self.transforms = [] | |||||
| if brightness != 0: | |||||
| self.transforms.append(Brightness(brightness)) | |||||
| if contrast != 0: | |||||
| self.transforms.append(Contrast(contrast)) | |||||
| if saturation != 0: | |||||
| self.transforms.append(Saturation(saturation)) | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||||
| 2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:36:47 - INFO - creating model alexnet | |||||
| 2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:36:48 - INFO - number of parameters: 61110184 | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||||
| 2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:37:52 - INFO - creating model resnet | |||||
| 2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:37:52 - INFO - number of parameters: 25557032 | |||||
| @ -0,0 +1,5 @@ | |||||
| 2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||||
| 2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
| 2021-04-15 15:38:16 - INFO - creating model alexnet | |||||
| 2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
| 2021-04-15 15:38:17 - INFO - number of parameters: 61110184 | |||||
| @ -0,0 +1,160 @@ | |||||
| import os | |||||
| import torch | |||||
| import logging.config | |||||
| import shutil | |||||
| import pandas as pd | |||||
| from bokeh.io import output_file, save, show | |||||
| from bokeh.plotting import figure | |||||
| from bokeh.layouts import column | |||||
| #from bokeh.charts import Line, defaults | |||||
| # | |||||
| #defaults.width = 800 | |||||
| #defaults.height = 400 | |||||
| #defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||||
| def setup_logging(log_file='log.txt'): | |||||
| """Setup logging configuration | |||||
| """ | |||||
| logging.basicConfig(level=logging.DEBUG, | |||||
| format="%(asctime)s - %(levelname)s - %(message)s", | |||||
| datefmt="%Y-%m-%d %H:%M:%S", | |||||
| filename=log_file, | |||||
| filemode='w') | |||||
| console = logging.StreamHandler() | |||||
| console.setLevel(logging.INFO) | |||||
| formatter = logging.Formatter('%(message)s') | |||||
| console.setFormatter(formatter) | |||||
| logging.getLogger('').addHandler(console) | |||||
| class ResultsLog(object): | |||||
| def __init__(self, path='results.csv', plot_path=None): | |||||
| self.path = path | |||||
| self.plot_path = plot_path or (self.path + '.html') | |||||
| self.figures = [] | |||||
| self.results = None | |||||
| def add(self, **kwargs): | |||||
| df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||||
| if self.results is None: | |||||
| self.results = df | |||||
| else: | |||||
| self.results = self.results.append(df, ignore_index=True) | |||||
| def save(self, title='Training Results'): | |||||
| if len(self.figures) > 0: | |||||
| if os.path.isfile(self.plot_path): | |||||
| os.remove(self.plot_path) | |||||
| output_file(self.plot_path, title=title) | |||||
| plot = column(*self.figures) | |||||
| save(plot) | |||||
| self.figures = [] | |||||
| self.results.to_csv(self.path, index=False, index_label=False) | |||||
| def load(self, path=None): | |||||
| path = path or self.path | |||||
| if os.path.isfile(path): | |||||
| self.results.read_csv(path) | |||||
| def show(self): | |||||
| if len(self.figures) > 0: | |||||
| plot = column(*self.figures) | |||||
| show(plot) | |||||
| #def plot(self, *kargs, **kwargs): | |||||
| # line = Line(data=self.results, *kargs, **kwargs) | |||||
| # self.figures.append(line) | |||||
| def image(self, *kargs, **kwargs): | |||||
| fig = figure() | |||||
| fig.image(*kargs, **kwargs) | |||||
| self.figures.append(fig) | |||||
| def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||||
| filename = os.path.join(path, filename) | |||||
| torch.save(state, filename) | |||||
| if is_best: | |||||
| shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||||
| if save_all: | |||||
| shutil.copyfile(filename, os.path.join( | |||||
| path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||||
| class AverageMeter(object): | |||||
| """Computes and stores the average and current value""" | |||||
| def __init__(self): | |||||
| self.reset() | |||||
| def reset(self): | |||||
| self.val = 0 | |||||
| self.avg = 0 | |||||
| self.sum = 0 | |||||
| self.count = 0 | |||||
| def update(self, val, n=1): | |||||
| self.val = val | |||||
| self.sum += val * n | |||||
| self.count += n | |||||
| self.avg = self.sum / self.count | |||||
| __optimizers = { | |||||
| 'SGD': torch.optim.SGD, | |||||
| 'ASGD': torch.optim.ASGD, | |||||
| 'Adam': torch.optim.Adam, | |||||
| 'Adamax': torch.optim.Adamax, | |||||
| 'Adagrad': torch.optim.Adagrad, | |||||
| 'Adadelta': torch.optim.Adadelta, | |||||
| 'Rprop': torch.optim.Rprop, | |||||
| 'RMSprop': torch.optim.RMSprop | |||||
| } | |||||
| def adjust_optimizer(optimizer, epoch, config): | |||||
| """Reconfigures the optimizer according to epoch and config dict""" | |||||
| def modify_optimizer(optimizer, setting): | |||||
| if 'optimizer' in setting: | |||||
| optimizer = __optimizers[setting['optimizer']]( | |||||
| optimizer.param_groups) | |||||
| logging.debug('OPTIMIZER - setting method = %s' % | |||||
| setting['optimizer']) | |||||
| for param_group in optimizer.param_groups: | |||||
| for key in param_group.keys(): | |||||
| if key in setting: | |||||
| logging.debug('OPTIMIZER - setting %s = %s' % | |||||
| (key, setting[key])) | |||||
| param_group[key] = setting[key] | |||||
| return optimizer | |||||
| if callable(config): | |||||
| optimizer = modify_optimizer(optimizer, config(epoch)) | |||||
| else: | |||||
| for e in range(epoch + 1): # run over all epochs - sticky setting | |||||
| if e in config: | |||||
| optimizer = modify_optimizer(optimizer, config[e]) | |||||
| return optimizer | |||||
| def accuracy(output, target, topk=(1,)): | |||||
| """Computes the precision@k for the specified values of k""" | |||||
| maxk = max(topk) | |||||
| batch_size = target.size(0) | |||||
| _, pred = output.float().topk(maxk, 1, True, True) | |||||
| pred = pred.t() | |||||
| correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||||
| res = [] | |||||
| for k in topk: | |||||
| correct_k = correct[:k].view(-1).float().sum(0) | |||||
| res.append(correct_k.mul_(100.0 / batch_size)) | |||||
| return res | |||||
| # kernel_img = model.features[0][0].kernel.data.clone() | |||||
| # kernel_img.add_(-kernel_img.min()) | |||||
| # kernel_img.mul_(255 / kernel_img.max()) | |||||
| # save_image(kernel_img, 'kernel%s.jpg' % epoch) | |||||
| @ -0,0 +1,154 @@ | |||||
| import torch | |||||
| import numpy as np | |||||
| import cv2, os, sys | |||||
| import pandas as pd | |||||
| from torch.utils.data import Dataset | |||||
| from matplotlib import pyplot as plt | |||||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torchvision.datasets import DatasetFolder | |||||
| from PIL import Image | |||||
| import torchvision.models as models | |||||
| batch_size = 32 | |||||
| num_epoch = 10 | |||||
| train_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.RandomResizedCrop((40,30)), | |||||
| transforms.Resize((40, 30)), | |||||
| transforms.ToTensor(), | |||||
| #transforms.TenCrop((40,30)), | |||||
| #transforms.Normalize(0.5,0.5), | |||||
| ]) | |||||
| test_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.Resize((40, 30)), | |||||
| transforms.ToTensor() | |||||
| ]) | |||||
| ''' | |||||
| class Classifier(nn.Module): | |||||
| def __init__(self): | |||||
| super(Classifier, self).__init__() | |||||
| self.cnn_layers = nn.Sequential( | |||||
| #input_size(1,30,40) | |||||
| nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||||
| nn.BatchNorm2d(16), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
| nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||||
| nn.BatchNorm2d(24), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
| nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||||
| nn.BatchNorm2d(32), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||||
| ) | |||||
| self.fc_layers = nn.Sequential( | |||||
| nn.Linear(32 * 2 * 3, 32), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.Linear(32,8) | |||||
| ) | |||||
| def forward(self, x): | |||||
| x = self.cnn_layers(x) | |||||
| x = x.flatten(1) | |||||
| x = self.fc_layers(x) | |||||
| return x | |||||
| ''' | |||||
| def main(): | |||||
| train_set = DatasetFolder("./dataset/data_0705/lepton/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
| test_set = DatasetFolder("./dataset/data_0705/lepton/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
| model = models.resnet18() | |||||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=3, | |||||
| bias=False) | |||||
| model.fc = nn.Linear(512, 3) | |||||
| model = model.to(device) | |||||
| print(model) | |||||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| for epoch in range(num_epoch): | |||||
| ##Training | |||||
| running_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(train_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| optimizer.zero_grad() | |||||
| outputs = model(inputs) | |||||
| loss = criterion(outputs, labels) | |||||
| loss.backward() | |||||
| optimizer.step() | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| #print("label",labels) | |||||
| correct += (predicted == labels).sum().item() | |||||
| train_acc = correct / total | |||||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
| ##Testing | |||||
| model.eval() | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| for i, data in enumerate(test_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| #print(predicted) | |||||
| #print("labels:",labels) | |||||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,211 @@ | |||||
| import torch | |||||
| import numpy as np | |||||
| import cv2, os, sys | |||||
| import pandas as pd | |||||
| from torch.utils.data import Dataset | |||||
| from matplotlib import pyplot as plt | |||||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torchvision.datasets import DatasetFolder | |||||
| from PIL import Image | |||||
| import torchvision.models | |||||
| import BinaryNetpytorch.models as models | |||||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| batch_size = 32 | |||||
| num_epoch = 10 | |||||
| train_tfm = transforms.Compose([ | |||||
| # transforms.RandomHorizontalFlip(), | |||||
| # transforms.RandomResizedCrop((40,30)), | |||||
| transforms.Grayscale(), | |||||
| transforms.Resize((40, 30)), | |||||
| transforms.ToTensor(), | |||||
| #transforms.RandomResizedCrop((40,30)), | |||||
| #transforms.TenCrop((40,30)), | |||||
| # transforms.Normalize(0.5,0.5), | |||||
| ]) | |||||
| test_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.Resize((40, 30)), | |||||
| transforms.ToTensor() | |||||
| ]) | |||||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.conv2 = Binaryconv3x3(planes, planes) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.do_bntan=do_bntan | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x.clone() | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh1(out) | |||||
| out = self.conv2(out) | |||||
| if self.downsample is not None: | |||||
| if residual.data.max()>1: | |||||
| import pdb; pdb.set_trace() | |||||
| residual = self.downsample(residual) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks-1): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.bn1(x) | |||||
| x = self.tanh1(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.bn2(x) | |||||
| x = self.tanh2(x) | |||||
| x = self.fc(x) | |||||
| x = self.bn3(x) | |||||
| x = self.logsoftmax(x) | |||||
| return x | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=3, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inflate = 5 | |||||
| self.inplanes = 16*self.inflate | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.maxpool = lambda x: x | |||||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
| self.bn3 = nn.BatchNorm1d(3) | |||||
| self.logsoftmax = nn.LogSoftmax() | |||||
| self.fc = BinarizeLinear(64*self.inflate, 3) | |||||
| def main(): | |||||
| train_set = DatasetFolder("pose_data/training/labeled", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
| test_set = DatasetFolder("pose_data/testing", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
| model = ResNet_cifar10(num_classes=3,block=BasicBlock,depth=18) | |||||
| model = model.to(device) | |||||
| print(model) | |||||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| for epoch in range(num_epoch): | |||||
| running_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(train_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| optimizer.zero_grad() | |||||
| outputs = model(inputs) | |||||
| loss = criterion(outputs, labels) | |||||
| loss.backward() | |||||
| optimizer.step() | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| #print("label",labels) | |||||
| correct += (predicted == labels).sum().item() | |||||
| train_acc = correct / total | |||||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
| model.eval() | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| for i, data in enumerate(test_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| #print(predicted) | |||||
| #print("labels:",labels) | |||||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,193 @@ | |||||
| import torch | |||||
| import numpy as np | |||||
| import cv2, os, sys | |||||
| import pandas as pd | |||||
| from torch.utils.data import Dataset | |||||
| from matplotlib import pyplot as plt | |||||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torchvision.datasets import DatasetFolder | |||||
| from PIL import Image | |||||
| import torchvision.models as models | |||||
| batch_size = 32 | |||||
| num_epoch = 1 | |||||
| torch.cuda.set_device(1) | |||||
| train_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.RandomHorizontalFlip(), | |||||
| transforms.RandomResizedCrop((68,68)), | |||||
| transforms.ToTensor(), | |||||
| #transforms.RandomResizedCrop((40,30)), | |||||
| #transforms.TenCrop((40,30)), | |||||
| #transforms.Normalize(0.5,0.5), | |||||
| ]) | |||||
| test_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.ToTensor() | |||||
| ]) | |||||
| ''' | |||||
| class Classifier(nn.Module): | |||||
| def __init__(self): | |||||
| super(Classifier, self).__init__() | |||||
| self.cnn_layers = nn.Sequential( | |||||
| #input_size(1,30,40) | |||||
| nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||||
| nn.BatchNorm2d(16), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
| nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||||
| nn.BatchNorm2d(24), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
| nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||||
| nn.BatchNorm2d(32), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||||
| ) | |||||
| self.fc_layers = nn.Sequential( | |||||
| nn.Linear(32 * 2 * 3, 32), | |||||
| nn.ReLU(), | |||||
| nn.Dropout(0.2), | |||||
| nn.Linear(32,8) | |||||
| ) | |||||
| def forward(self, x): | |||||
| x = self.cnn_layers(x) | |||||
| x = x.flatten(1) | |||||
| x = self.fc_layers(x) | |||||
| return x | |||||
| ''' | |||||
| def main(): | |||||
| train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
| test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| valid_set = DatasetFolder("pose_data2/val", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
| valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True) | |||||
| model_path = "model.ckpt" | |||||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
| model = models.resnet50() | |||||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| model.fc = nn.Linear(2048, 8) | |||||
| model = model.to(device) | |||||
| print(model) | |||||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| best_acc = -1 | |||||
| for epoch in range(num_epoch): | |||||
| ##Training | |||||
| running_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(train_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| optimizer.zero_grad() | |||||
| outputs = model(inputs) | |||||
| loss = criterion(outputs, labels) | |||||
| loss.backward() | |||||
| optimizer.step() | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| #print("label",labels) | |||||
| correct += (predicted == labels).sum().item() | |||||
| train_acc = correct / total | |||||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
| ##Validation | |||||
| model.eval() | |||||
| valid_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(valid_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| with torch.no_grad(): | |||||
| outputs = model(inputs) | |||||
| loss = criterion(outputs, labels) | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| correct += (predicted == labels).sum().item() | |||||
| valid_acc = correct / total | |||||
| print(f"[ Valid | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {valid_acc:.5f}") | |||||
| if valid_acc > best_acc: | |||||
| best_acc = valid_acc | |||||
| torch.save(model.state_dict(), model_path) | |||||
| print('saving model with acc {:.3f}'.format(valid_acc)) | |||||
| ##Testing | |||||
| model = models.resnet50() | |||||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||||
| bias=False) | |||||
| model.fc = nn.Linear(2048, 8) | |||||
| model = model.to(device) | |||||
| model.load_state_dict(torch.load(model_path)) | |||||
| model.eval() | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| for i, data in enumerate(test_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| # for k in range(batch_size): | |||||
| # if predicted[k] != labels[k]: | |||||
| # print(inputs[k]) | |||||
| #print(predicted) | |||||
| #print("labels:",labels) | |||||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,285 @@ | |||||
| import torch | |||||
| import numpy as np | |||||
| import cv2, os, sys | |||||
| import pandas as pd | |||||
| from torch.utils.data import Dataset | |||||
| from matplotlib import pyplot as plt | |||||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torchvision.datasets import DatasetFolder | |||||
| from PIL import Image | |||||
| import torchvision.models | |||||
| import BinaryNetpytorch.models as models | |||||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| import progressbar | |||||
| import seaborn as sns | |||||
| batch_size = 32 | |||||
| num_epoch = 60 | |||||
| torch.cuda.set_device(1) | |||||
| train_tfm = transforms.Compose([ | |||||
| # transforms.RandomHorizontalFlip(), | |||||
| # transforms.RandomResizedCrop((40,30)), | |||||
| transforms.Grayscale(), | |||||
| transforms.Resize((68, 68)), | |||||
| transforms.ToTensor(), | |||||
| #transforms.RandomResizedCrop((40,30)), | |||||
| #transforms.TenCrop((40,30)), | |||||
| # transforms.Normalize(0.5,0.5), | |||||
| ]) | |||||
| test_tfm = transforms.Compose([ | |||||
| transforms.Grayscale(), | |||||
| transforms.Resize((68, 68)), | |||||
| transforms.ToTensor() | |||||
| ]) | |||||
| def Quantize(img): | |||||
| scaler = torch.div(img, 0.0078125, rounding_mode="floor") | |||||
| scaler_t1 = scaler * 0.0078125 | |||||
| scaler_t2 = (scaler + 1) * 0.0078125 | |||||
| img = torch.where(abs(img - scaler_t1) < abs(img -scaler_t2), scaler_t1 , scaler_t2) | |||||
| return img | |||||
| # bar = progressbar.ProgressBar(maxval=img.size(0)*img.size(2)*img.size(3), \ | |||||
| # widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) | |||||
| # bar.start() | |||||
| # for p in range(img.size(0)): | |||||
| # for i in range(img.size(2)): | |||||
| # for j in range(img.size(3)): | |||||
| # scaler = int(img[p][0][i][j] / 0.0078125) | |||||
| # t1 = scaler * 0.0078125 | |||||
| # t2 = (scaler + 1) * 0.0078125 | |||||
| # if(abs(img[p][0][i][j] - t1) < abs(img[p][0][i][j] - t2)): | |||||
| # img[p][0][i][j] = t1 | |||||
| # else: | |||||
| # img[p][0][i][j] = t2 | |||||
| # bar.finish() | |||||
| # return img | |||||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| def conv3x3(in_planes, out_planes, stride=1): | |||||
| "3x3 convolution with padding" | |||||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
| padding=1, bias=False) | |||||
| class BasicBlock(nn.Module): | |||||
| expansion = 1 | |||||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
| super(BasicBlock, self).__init__() | |||||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
| self.bn1 = nn.BatchNorm2d(planes) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.conv2 = Binaryconv3x3(planes, planes) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.bn2 = nn.BatchNorm2d(planes) | |||||
| self.downsample = downsample | |||||
| self.do_bntan=do_bntan | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x.clone() | |||||
| x = Quantize(x) | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| out = self.tanh1(out) | |||||
| out = Quantize(out) | |||||
| out = self.conv2(out) | |||||
| if self.downsample is not None: | |||||
| if residual.data.max()>1: | |||||
| import pdb; pdb.set_trace() | |||||
| residual = self.downsample(residual) | |||||
| out += residual | |||||
| if self.do_bntan: | |||||
| out = self.bn2(out) | |||||
| out = self.tanh2(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self): | |||||
| super(ResNet, self).__init__() | |||||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| downsample = nn.Sequential( | |||||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
| kernel_size=1, stride=stride, bias=False), | |||||
| nn.BatchNorm2d(planes * block.expansion), | |||||
| ) | |||||
| layers = [] | |||||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks-1): | |||||
| layers.append(block(self.inplanes, planes)) | |||||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = Quantize(x) | |||||
| x = self.conv1(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.bn1(x) | |||||
| x = self.tanh1(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.bn2(x) | |||||
| x = self.tanh2(x) | |||||
| #print(x.size()) | |||||
| x = x.view(32,1280,1,1) | |||||
| x = self.fc(x) | |||||
| x = x.view(x.size(0), -1) | |||||
| x = self.bn3(x) | |||||
| x = self.logsoftmax(x) | |||||
| return x | |||||
| class ResNet_cifar10(ResNet): | |||||
| def __init__(self, num_classes=8, | |||||
| block=BasicBlock, depth=18): | |||||
| super(ResNet_cifar10, self).__init__() | |||||
| self.inflate = 5 | |||||
| self.inplanes = 16*self.inflate | |||||
| n = int((depth - 2) / 6) | |||||
| self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
| bias=False) | |||||
| self.maxpool = lambda x: x | |||||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
| self.layer4 = lambda x: x | |||||
| self.avgpool = nn.AvgPool2d(8) | |||||
| self.bn2 = nn.BatchNorm1d(256*self.inflate) | |||||
| self.bn3 = nn.BatchNorm1d(8) | |||||
| self.logsoftmax = nn.LogSoftmax() | |||||
| #self.fc = BinarizeLinear(256*self.inflate, 8) | |||||
| self.fc = BinarizeConv2d(256*self.inflate, 8, kernel_size=1) | |||||
| def main(): | |||||
| train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
| test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
| model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||||
| model = model.to(device) | |||||
| print(model) | |||||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| model_path = "model.ckpt" | |||||
| for epoch in range(num_epoch): | |||||
| running_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(train_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| optimizer.zero_grad() | |||||
| outputs = model(inputs) | |||||
| loss = criterion(outputs, labels) | |||||
| loss.backward() | |||||
| optimizer.step() | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| #print("label",labels) | |||||
| correct += (predicted == labels).sum().item() | |||||
| train_acc = correct / total | |||||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
| torch.save(model.state_dict(), model_path) | |||||
| model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||||
| model = model.to(device) | |||||
| model.load_state_dict(torch.load(model_path)) | |||||
| model.eval() | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| correct_2 = 0 | |||||
| stat = np.zeros((8,8)) | |||||
| for i, data in enumerate(test_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| for b in range(batch_size): | |||||
| if predicted[b] == 0 or predicted[b] == 1 or predicted[b] == 2 or predicted[b] == 3: | |||||
| if labels[b] == 0 or labels[b] == 1 or labels[b] == 2 or labels[b] == 3: | |||||
| correct_2 += 1 | |||||
| else: | |||||
| if labels[b] == 4 or labels[b] == 5 or labels[b] == 6 or labels[b] == 7: | |||||
| correct_2 += 1 | |||||
| for k in range(batch_size): | |||||
| if predicted[k] != labels[k]: | |||||
| img = inputs[k].mul(255).byte() | |||||
| img = img.cpu().numpy().squeeze(0) | |||||
| img = np.moveaxis(img, 0, -1) | |||||
| predict = predicted[k].cpu().numpy() | |||||
| label = labels[k].cpu().numpy() | |||||
| path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||||
| stat[int(label)][int(predict)] += 1 | |||||
| cv2.imwrite(path,img) | |||||
| print(stat) | |||||
| ax = sns.heatmap(stat, linewidth=0.5) | |||||
| plt.xlabel('Prediction') | |||||
| plt.ylabel('Label') | |||||
| plt.savefig('heatmap.jpg') | |||||
| #print(predicted) | |||||
| #print("labels:",labels) | |||||
| print('Test_2clasee Accuracy:{} %'.format((correct_2 / total) * 100)) | |||||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @ -0,0 +1,207 @@ | |||||
| import torch | |||||
| import numpy as np | |||||
| import cv2, os, sys | |||||
| from torch.utils.data import Dataset | |||||
| from matplotlib import pyplot as plt | |||||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
| import torch.nn as nn | |||||
| import torchvision.transforms as transforms | |||||
| from torchvision.datasets import DatasetFolder | |||||
| from PIL import Image | |||||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
| from BinaryNetpytorch.models.binarized_modules import Binarize,HingeLoss | |||||
| import seaborn as sns | |||||
| import random | |||||
| batch_size = 8 | |||||
| num_epoch = 10 | |||||
| seed = 777 | |||||
| torch.manual_seed(seed) | |||||
| torch.cuda.manual_seed(seed) | |||||
| torch.cuda.manual_seed_all(seed) | |||||
| np.random.seed(seed) | |||||
| random.seed(seed) | |||||
| torch.backends.cudnn.benchmark = False | |||||
| torch.backends.cudnn.deterministic = True | |||||
| train_tfm = transforms.Compose([ | |||||
| #transforms.Grayscale(), | |||||
| #transforms.RandomHorizontalFlip(), | |||||
| #transforms.RandomResizedCrop((40,30)), | |||||
| #transforms.RandomCrop((40,30)), | |||||
| #transforms.RandomHorizontalFlip(), | |||||
| transforms.ToTensor(), | |||||
| #transforms.RandomResizedCrop((40,30)), | |||||
| #transforms.TenCrop((40,30)), | |||||
| #transforms.Normalize(0.5,0.5), | |||||
| ]) | |||||
| test_tfm = transforms.Compose([ | |||||
| #transforms.Grayscale(), | |||||
| transforms.ToTensor() | |||||
| ]) | |||||
| class Classifier(nn.Module): | |||||
| def __init__(self): | |||||
| super(Classifier, self).__init__() | |||||
| self.cnn_layers = nn.Sequential( | |||||
| # BinarizeConv2d(in_channels=1, out_channels=128, kernel_size=9, padding=9//2, bias=False), | |||||
| # nn.BatchNorm2d(128), | |||||
| # nn.ReLU(), | |||||
| # BinarizeConv2d(in_channels=128, out_channels=64, kernel_size=1, padding=1//2, bias=False), | |||||
| # nn.BatchNorm2d(64), | |||||
| #input_size(1,30,40) | |||||
| BinarizeConv2d(1, 128, 3, 1), #output_size(16,28,38) | |||||
| nn.BatchNorm2d(128), | |||||
| nn.ReLU(), | |||||
| #nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
| BinarizeConv2d(128, 64, 3, 1), #output_size(24,12,17) | |||||
| nn.BatchNorm2d(64), | |||||
| nn.ReLU(), | |||||
| #nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
| BinarizeConv2d(64, 32, 3, 1), #output_size(32,4,6) | |||||
| nn.BatchNorm2d(32), | |||||
| nn.ReLU(), | |||||
| #nn.Dropout(0.2), | |||||
| nn.MaxPool2d(kernel_size = 2), #ouput_size(32,2,3) | |||||
| #nn.LogSoftmax(), | |||||
| BinarizeConv2d(32, 3, (3,2), 1) #ouput_size(4,2,3) without max :(32,24,34) | |||||
| ) | |||||
| def forward(self, x): | |||||
| x = self.cnn_layers(x) | |||||
| #x = x.flatten(1) | |||||
| #x = self.fc_layers(x) | |||||
| #print(x.shape) | |||||
| x = x.view(x.size(0), -1) | |||||
| #print(x.shape) | |||||
| #x = nn.LogSoftmax(x) | |||||
| #print(x) | |||||
| return x | |||||
| def main(): | |||||
| train_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
| test_set = DatasetFolder("./dataset/data_0711/grideye/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| val_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
| val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True) | |||||
| save_path = 'models.ckpt' | |||||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
| model = Classifier().to(device) | |||||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) | |||||
| criterion = nn.CrossEntropyLoss() | |||||
| best_accuracy = 0.0 | |||||
| for epoch in range(num_epoch): | |||||
| running_loss = 0.0 | |||||
| total = 0 | |||||
| correct = 0 | |||||
| for i, data in enumerate(train_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| #print(labels) | |||||
| optimizer.zero_grad() | |||||
| outputs = model(inputs) | |||||
| #print(outputs.shape) | |||||
| loss = criterion(outputs, labels) | |||||
| loss.backward() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.data.copy_(p.org) | |||||
| optimizer.step() | |||||
| for p in list(model.parameters()): | |||||
| if hasattr(p,'org'): | |||||
| p.org.copy_(p.data.clamp_(-1,1)) | |||||
| running_loss += loss.item() | |||||
| total += labels.size(0) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| #print("label",labels) | |||||
| correct += (predicted == labels).sum().item() | |||||
| train_acc = correct / total | |||||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
| model.eval() | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| for i, data in enumerate(val_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| val_acc = correct / total | |||||
| if val_acc > best_accuracy: | |||||
| best_accuracy = val_acc | |||||
| torch.save(model.state_dict(), save_path) | |||||
| print("Save Model") | |||||
| print(f"[ Val | {epoch + 1:03d}/{num_epoch:03d} ] acc = {val_acc:.5f}") | |||||
| model = Classifier().to(device) | |||||
| model.load_state_dict(torch.load(save_path)) | |||||
| model.eval() | |||||
| stat = np.zeros((3,3)) | |||||
| with torch.no_grad(): | |||||
| correct = 0 | |||||
| total = 0 | |||||
| print(model) | |||||
| for i, data in enumerate(test_loader): | |||||
| inputs, labels = data | |||||
| inputs = inputs.to(device) | |||||
| labels = labels.to(device) | |||||
| outputs = model(inputs) | |||||
| #print(outputs.data) | |||||
| _,predicted = torch.max(outputs.data,1) | |||||
| #print(predicted) | |||||
| total += labels.size(0) | |||||
| correct += (predicted == labels).sum().item() | |||||
| for k in range(len(predicted)): | |||||
| if predicted[k] != labels[k]: | |||||
| img = inputs[k].mul(255).byte() | |||||
| img = img.cpu().numpy().squeeze(0) | |||||
| img = np.moveaxis(img, 0, -1) | |||||
| predict = predicted[k].cpu().numpy() | |||||
| label = labels[k].cpu().numpy() | |||||
| path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||||
| stat[int(label)][int(predict)] += 1 | |||||
| ax = sns.heatmap(stat, linewidth=0.5) | |||||
| plt.xlabel('Prediction') | |||||
| plt.ylabel('Label') | |||||
| plt.savefig('heatmap.jpg') | |||||
| #print(predicted) | |||||
| #print("labels:",labels) | |||||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||