点击上方,选择星标或置顶,每天给你送干货!
阅读大概需要10分钟
跟随小博主,每天进步一丢丢
作者 | 郁振波
地址 | https://zhuanlan.zhihu.com/p/7792356
编辑 | 深度学习这件小事公众号
写:
imagenametotal_.append(os.path.join('images', imagenametotal).encode())
with h5py.File(outfile) as f:
f.create_dataset('imagename', data=imagenametotal_)
f['part'] = parts_
f['S'] = Ss_
f['image'] = cvimgs
读:
with h5py.File(outfile) as f:
imagename = [x.decode() for x in f['imagename']]
kp2ds = np.array(f['part'])
kp3ds = np.array(f['S'])
cvimgs = np.array(f['image'])
from balanced_parallel import DataParallelModel, DataParallelCriterion
model = DataParallelModel(model, device_ids=gpus).cuda()
criterion = loss_fn().cuda()
from torch.nn.parallel.scatter_gather import gather
preds = gather(preds, 0)
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel
torch.distributed.init_process_group(backend="nccl")
# 配置每个进程的gpu
local_rank = torch.distributed.get_rank()
torch.cuda.set_device(local_rank)
device = torch.device("cuda", local_rank)
#封装之前要把模型移到对应的gpu
model.to(device)
model = torch.nn.parallel.DistributedDataParallel(model,device_ids=[local_rank],
output_device=local_rank)
#原有的dataloader上面加一个数据sample
train_loader = torch.utils.data.DataLoader(
train_dataset,
sampler=DistributedSampler(train_dataset)
)
cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.enabled = True
torch.cuda.empty_cache()
del xxx(变量名)
def __len__(self):
return self.images.shape[0]
train_loader = torch.utils.data.DataLoader(
train_dataset,
pin_memory=True,
)
在深度学习中喂饱gpu
https://zhuanlan.zhihu.com/p/77633542