Module 7_yolov3.lib.models
Expand source code
import torch.nn.functional as F
from utils.google_utils import *
from utils.parse_config import *
from utils.utils import *
ONNX_EXPORT = False
def create_modules(module_defs, img_size, arc):
# Constructs module list of layer blocks from module configuration in module_defs
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams['channels'])]
module_list = nn.ModuleList()
routs = [] # list of layers which rout to deeper layers
yolo_index = -1
for i, mdef in enumerate(module_defs):
modules = nn.Sequential()
if mdef['type'] == 'convolutional':
bn = int(mdef['batch_normalize'])
filters = int(mdef['filters'])
size = int(mdef['size'])
stride = int(mdef['stride']) if 'stride' in mdef else (int(mdef['stride_y']), int(mdef['stride_x']))
pad = (size - 1) // 2 if int(mdef['pad']) else 0
modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
out_channels=filters,
kernel_size=size,
stride=stride,
padding=pad,
groups=int(mdef['groups']) if 'groups' in mdef else 1,
bias=not bn))
if bn:
modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1))
if mdef['activation'] == 'leaky': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
# modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10))
elif mdef['activation'] == 'swish':
modules.add_module('activation', Swish())
elif mdef['type'] == 'maxpool':
size = int(mdef['size'])
stride = int(mdef['stride'])
maxpool = nn.MaxPool2d(kernel_size=size, stride=stride, padding=int((size - 1) // 2))
if size == 2 and stride == 1: # yolov3-tiny
modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
modules.add_module('MaxPool2d', maxpool)
else:
modules = maxpool
elif mdef['type'] == 'upsample':
modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest')
elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer
layers = [int(x) for x in mdef['layers'].split(',')]
filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
routs.extend([l if l > 0 else l + i for l in layers])
# if mdef[i+1]['type'] == 'reorg3d':
# modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest') # reorg3d
elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer
filters = output_filters[int(mdef['from'])]
layer = int(mdef['from'])
routs.extend([i + layer if layer < 0 else layer])
elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale
# torch.Size([16, 128, 104, 104])
# torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer
pass
elif mdef['type'] == 'yolo':
yolo_index += 1
mask = [int(x) for x in mdef['mask'].split(',')] # anchor mask
modules = YOLOLayer(anchors=mdef['anchors'][mask], # anchor list
nc=int(mdef['classes']), # number of classes
img_size=img_size, # (416, 416)
yolo_index=yolo_index, # 0, 1 or 2
arc=arc) # yolo architecture
# Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
try:
if arc == 'defaultpw' or arc == 'Fdefaultpw': # default with positive weights
b = [-5.0, -5.0] # obj, cls
elif arc == 'default': # default no pw (40 cls, 80 obj)
b = [-5.0, -5.0]
elif arc == 'uBCE': # unified BCE (80 classes)
b = [0, -9.0]
elif arc == 'uCE': # unified CE (1 background + 80 classes)
b = [10, -0.1]
elif arc == 'Fdefault': # Focal default no pw (28 cls, 21 obj, no pw)
b = [-2.1, -1.8]
elif arc == 'uFBCE' or arc == 'uFBCEpw': # unified FocalBCE (5120 obj, 80 classes)
b = [0, -6.5]
elif arc == 'uFCE': # unified FocalCE (64 cls, 1 background + 80 classes)
b = [7.7, -1.1]
bias = module_list[-1][0].bias.view(len(mask), -1) # 255 to 3x85
bias[:, 4] += b[0] - bias[:, 4].mean() # obj
bias[:, 5:] += b[1] - bias[:, 5:].mean() # cls
# bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index] # list of tensors [3x85, 3x85, 3x85]
module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1))
# utils.print_model_biases(model)
except:
print('WARNING: smart bias initialization failure.')
else:
print('Warning: Unrecognized Layer Type: ' + mdef['type'])
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return module_list, routs
class SwishImplementation(torch.autograd.Function):
@staticmethod
def forward(ctx, i):
ctx.save_for_backward(i)
return i * torch.sigmoid(i)
@staticmethod
def backward(ctx, grad_output):
sigmoid_i = torch.sigmoid(ctx.saved_variables[0])
return grad_output * (sigmoid_i * (1 + ctx.saved_variables[0] * (1 - sigmoid_i)))
class MemoryEfficientSwish(nn.Module):
def forward(self, x):
return SwishImplementation.apply(x)
class Swish(nn.Module):
def forward(self, x):
return x.mul_(torch.sigmoid(x))
class Mish(nn.Module): # https://github.com/digantamisra98/Mish
def forward(self, x):
return x.mul_(F.softplus(x).tanh())
class YOLOLayer(nn.Module):
def __init__(self, anchors, nc, img_size, yolo_index, arc):
super(YOLOLayer, self).__init__()
self.anchors = torch.Tensor(anchors)
self.na = len(anchors) # number of anchors (3)
self.nc = nc # number of classes (80)
self.no = nc + 5 # number of outputs
self.nx = 0 # initialize number of x gridpoints
self.ny = 0 # initialize number of y gridpoints
self.arc = arc
if ONNX_EXPORT: # grids must be computed in __init__
stride = [32, 16, 8][yolo_index] # stride of this layer
nx = int(img_size[1] / stride) # number x grid points
ny = int(img_size[0] / stride) # number y grid points
create_grids(self, img_size, (nx, ny))
def forward(self, p, img_size, var=None):
if ONNX_EXPORT:
bs = 1 # batch size
else:
bs, _, ny, nx = p.shape # bs, 255, 13, 13
if (self.nx, self.ny) != (nx, ny):
create_grids(self, img_size, (nx, ny), p.device, p.dtype)
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
if self.training:
return p
elif ONNX_EXPORT:
# Constants CAN NOT BE BROADCAST, ensure correct shape!
m = self.na * self.nx * self.ny
grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view(m, 2)
anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view(m, 2) / self.ng
p = p.view(m, self.no)
xy = torch.sigmoid(p[:, 0:2]) + grid_xy # x, y
wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height
p_cls = torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf
return p_cls, xy / self.ng, wh
else: # inference
# s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2)
io = p.clone() # inference output
io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid_xy # xy
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
# io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method
io[..., :4] *= self.stride
if 'default' in self.arc: # seperate obj and cls
torch.sigmoid_(io[..., 4:])
elif 'BCE' in self.arc: # unified BCE (80 classes)
torch.sigmoid_(io[..., 5:])
io[..., 4] = 1
elif 'CE' in self.arc: # unified CE (1 background + 80 classes)
io[..., 4:] = F.softmax(io[..., 4:], dim=4)
io[..., 4] = 1
if self.nc == 1:
io[..., 5] = 1 # single-class model https://github.com/ultralytics/yolov3/issues/235
# reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
return io.view(bs, -1, self.no), p
class Darknet(nn.Module):
# YOLOv3 object detection model
def __init__(self, cfg, img_size=(416, 416), arc='default'):
super(Darknet, self).__init__()
self.module_defs = parse_model_cfg(cfg)
self.module_list, self.routs = create_modules(self.module_defs, img_size, arc)
self.yolo_layers = get_yolo_layers(self)
# Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision
self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training
def forward(self, x, var=None):
img_size = x.shape[-2:]
layer_outputs = []
output = []
for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)):
mtype = mdef['type']
if mtype in ['convolutional', 'upsample', 'maxpool']:
x = module(x)
elif mtype == 'route':
layers = [int(x) for x in mdef['layers'].split(',')]
if len(layers) == 1:
x = layer_outputs[layers[0]]
else:
try:
x = torch.cat([layer_outputs[i] for i in layers], 1)
except: # apply stride 2 for darknet reorg layer
layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5])
x = torch.cat([layer_outputs[i] for i in layers], 1)
# print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape)
elif mtype == 'shortcut':
x = x + layer_outputs[int(mdef['from'])]
elif mtype == 'yolo':
output.append(module(x, img_size))
layer_outputs.append(x if i in self.routs else [])
if self.training:
return output
elif ONNX_EXPORT:
x = [torch.cat(x, 0) for x in zip(*output)]
return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4
else:
io, p = list(zip(*output)) # inference output, training output
return torch.cat(io, 1), p
def fuse(self):
# Fuse Conv2d + BatchNorm2d layers throughout model
fused_list = nn.ModuleList()
for a in list(self.children())[0]:
if isinstance(a, nn.Sequential):
for i, b in enumerate(a):
if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
# fuse this bn layer with the previous conv2d layer
conv = a[i - 1]
fused = torch_utils.fuse_conv_and_bn(conv, b)
a = nn.Sequential(fused, *list(a.children())[i + 1:])
break
fused_list.append(a)
self.module_list = fused_list
# model_info(self) # yolov3-spp reduced from 225 to 152 layers
def get_yolo_layers(model):
return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo'] # [82, 94, 106] for yolov3
def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32):
nx, ny = ng # x and y grid size
self.img_size = max(img_size)
self.stride = self.img_size / max(ng)
# build xy offsets
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2))
# build wh gains
self.anchor_vec = self.anchors.to(device) / self.stride
self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type)
self.ng = torch.Tensor(ng).to(device)
self.nx = nx
self.ny = ny
def load_darknet_weights(self, weights, cutoff=-1):
# Parses and loads the weights stored in 'weights'
# Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
file = Path(weights).name
if file == 'darknet53.conv.74':
cutoff = 75
elif file == 'yolov3-tiny.conv.15':
cutoff = 15
# Read weights file
with open(weights, 'rb') as f:
# Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision
self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training
weights = np.fromfile(f, dtype=np.float32) # the rest are weights
ptr = 0
for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if mdef['type'] == 'convolutional':
conv_layer = module[0]
if mdef['batch_normalize']:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w
def save_weights(self, path='model.weights', cutoff=-1):
# Converts a PyTorch model to Darket format (*.pt to *.weights)
# Note: Does not work if model.fuse() is applied
with open(path, 'wb') as f:
# Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
self.version.tofile(f) # (int32) version info: major, minor, revision
self.seen.tofile(f) # (int64) number of images seen during training
# Iterate through layers
for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if mdef['type'] == 'convolutional':
conv_layer = module[0]
# If batch norm, load bn first
if mdef['batch_normalize']:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(f)
bn_layer.weight.data.cpu().numpy().tofile(f)
bn_layer.running_mean.data.cpu().numpy().tofile(f)
bn_layer.running_var.data.cpu().numpy().tofile(f)
# Load conv bias
else:
conv_layer.bias.data.cpu().numpy().tofile(f)
# Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(f)
def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'):
# Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
# from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
# Initialize model
model = Darknet(cfg)
# Load weights and save
if weights.endswith('.pt'): # if PyTorch format
model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
save_weights(model, path='converted.weights', cutoff=-1)
print("Success: converted '%s' to 'converted.weights'" % weights)
elif weights.endswith('.weights'): # darknet format
_ = load_darknet_weights(model, weights)
chkpt = {'epoch': -1,
'best_fitness': None,
'training_results': None,
'model': model.state_dict(),
'optimizer': None}
torch.save(chkpt, 'converted.pt')
print("Success: converted '%s' to 'converted.pt'" % weights)
else:
print('Error: extension not supported.')
def attempt_download(weights):
# Attempt to download pretrained weights if not found locally
msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0'
if weights and not os.path.isfile(weights):
d = {'yolov3-spp.weights': '16lYS4bcIdM2HdmyJBVDOvt3Trx6N3W2R',
'yolov3.weights': '1uTlyDWlnaqXcsKOktP5aH_zRDbfcDp-y',
'yolov3-tiny.weights': '1CCF-iNIIkYesIDzaPvdwlcf7H9zSsKZQ',
'yolov3-spp.pt': '1f6Ovy3BSq2wYq4UfvFUpxJFNDFfrIDcR',
'yolov3.pt': '1SHNFyoe5Ni8DajDNEqgB2oVKBb_NoEad',
'yolov3-tiny.pt': '10m_3MlpQwRtZetQxtksm9jqHrPTHZ6vo',
'darknet53.conv.74': '1WUVBid-XuoUBmvzBVUCBl_ELrzqwA8dJ',
'yolov3-tiny.conv.15': '1Bw0kCpplxUqyRYAJr9RY9SGnOJbo9nEj',
'ultralytics49.pt': '158g62Vs14E3aj7oPVPuEnNZMKFNgGyNq',
'ultralytics68.pt': '1Jm8kqnMdMGUUxGo8zMFZMJ0eaPwLkxSG'}
file = Path(weights).name
if file in d:
r = gdrive_download(id=d[file], name=weights)
else: # download from pjreddie.com
url = 'https://pjreddie.com/media/files/' + file
print('Downloading ' + url)
r = os.system('curl -f ' + url + ' -o ' + weights)
# Error check
if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB
os.system('rm ' + weights) # remove partial downloads
raise Exception(msg)
Functions
def attempt_download(weights)
-
Expand source code
def attempt_download(weights): # Attempt to download pretrained weights if not found locally msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0' if weights and not os.path.isfile(weights): d = {'yolov3-spp.weights': '16lYS4bcIdM2HdmyJBVDOvt3Trx6N3W2R', 'yolov3.weights': '1uTlyDWlnaqXcsKOktP5aH_zRDbfcDp-y', 'yolov3-tiny.weights': '1CCF-iNIIkYesIDzaPvdwlcf7H9zSsKZQ', 'yolov3-spp.pt': '1f6Ovy3BSq2wYq4UfvFUpxJFNDFfrIDcR', 'yolov3.pt': '1SHNFyoe5Ni8DajDNEqgB2oVKBb_NoEad', 'yolov3-tiny.pt': '10m_3MlpQwRtZetQxtksm9jqHrPTHZ6vo', 'darknet53.conv.74': '1WUVBid-XuoUBmvzBVUCBl_ELrzqwA8dJ', 'yolov3-tiny.conv.15': '1Bw0kCpplxUqyRYAJr9RY9SGnOJbo9nEj', 'ultralytics49.pt': '158g62Vs14E3aj7oPVPuEnNZMKFNgGyNq', 'ultralytics68.pt': '1Jm8kqnMdMGUUxGo8zMFZMJ0eaPwLkxSG'} file = Path(weights).name if file in d: r = gdrive_download(id=d[file], name=weights) else: # download from pjreddie.com url = 'https://pjreddie.com/media/files/' + file print('Downloading ' + url) r = os.system('curl -f ' + url + ' -o ' + weights) # Error check if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB os.system('rm ' + weights) # remove partial downloads raise Exception(msg)
def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights')
-
Expand source code
def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'): # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa) # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights') # Initialize model model = Darknet(cfg) # Load weights and save if weights.endswith('.pt'): # if PyTorch format model.load_state_dict(torch.load(weights, map_location='cpu')['model']) save_weights(model, path='converted.weights', cutoff=-1) print("Success: converted '%s' to 'converted.weights'" % weights) elif weights.endswith('.weights'): # darknet format _ = load_darknet_weights(model, weights) chkpt = {'epoch': -1, 'best_fitness': None, 'training_results': None, 'model': model.state_dict(), 'optimizer': None} torch.save(chkpt, 'converted.pt') print("Success: converted '%s' to 'converted.pt'" % weights) else: print('Error: extension not supported.')
def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32)
-
Expand source code
def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32): nx, ny = ng # x and y grid size self.img_size = max(img_size) self.stride = self.img_size / max(ng) # build xy offsets yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2)) # build wh gains self.anchor_vec = self.anchors.to(device) / self.stride self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type) self.ng = torch.Tensor(ng).to(device) self.nx = nx self.ny = ny
def create_modules(module_defs, img_size, arc)
-
Expand source code
def create_modules(module_defs, img_size, arc): # Constructs module list of layer blocks from module configuration in module_defs hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() routs = [] # list of layers which rout to deeper layers yolo_index = -1 for i, mdef in enumerate(module_defs): modules = nn.Sequential() if mdef['type'] == 'convolutional': bn = int(mdef['batch_normalize']) filters = int(mdef['filters']) size = int(mdef['size']) stride = int(mdef['stride']) if 'stride' in mdef else (int(mdef['stride_y']), int(mdef['stride_x'])) pad = (size - 1) // 2 if int(mdef['pad']) else 0 modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=size, stride=stride, padding=pad, groups=int(mdef['groups']) if 'groups' in mdef else 1, bias=not bn)) if bn: modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) if mdef['activation'] == 'leaky': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441 modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10)) elif mdef['activation'] == 'swish': modules.add_module('activation', Swish()) elif mdef['type'] == 'maxpool': size = int(mdef['size']) stride = int(mdef['stride']) maxpool = nn.MaxPool2d(kernel_size=size, stride=stride, padding=int((size - 1) // 2)) if size == 2 and stride == 1: # yolov3-tiny modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) modules.add_module('MaxPool2d', maxpool) else: modules = maxpool elif mdef['type'] == 'upsample': modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest') elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer layers = [int(x) for x in mdef['layers'].split(',')] filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers]) routs.extend([l if l > 0 else l + i for l in layers]) # if mdef[i+1]['type'] == 'reorg3d': # modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest') # reorg3d elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer filters = output_filters[int(mdef['from'])] layer = int(mdef['from']) routs.extend([i + layer if layer < 0 else layer]) elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale # torch.Size([16, 128, 104, 104]) # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer pass elif mdef['type'] == 'yolo': yolo_index += 1 mask = [int(x) for x in mdef['mask'].split(',')] # anchor mask modules = YOLOLayer(anchors=mdef['anchors'][mask], # anchor list nc=int(mdef['classes']), # number of classes img_size=img_size, # (416, 416) yolo_index=yolo_index, # 0, 1 or 2 arc=arc) # yolo architecture # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) try: if arc == 'defaultpw' or arc == 'Fdefaultpw': # default with positive weights b = [-5.0, -5.0] # obj, cls elif arc == 'default': # default no pw (40 cls, 80 obj) b = [-5.0, -5.0] elif arc == 'uBCE': # unified BCE (80 classes) b = [0, -9.0] elif arc == 'uCE': # unified CE (1 background + 80 classes) b = [10, -0.1] elif arc == 'Fdefault': # Focal default no pw (28 cls, 21 obj, no pw) b = [-2.1, -1.8] elif arc == 'uFBCE' or arc == 'uFBCEpw': # unified FocalBCE (5120 obj, 80 classes) b = [0, -6.5] elif arc == 'uFCE': # unified FocalCE (64 cls, 1 background + 80 classes) b = [7.7, -1.1] bias = module_list[-1][0].bias.view(len(mask), -1) # 255 to 3x85 bias[:, 4] += b[0] - bias[:, 4].mean() # obj bias[:, 5:] += b[1] - bias[:, 5:].mean() # cls # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index] # list of tensors [3x85, 3x85, 3x85] module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1)) # utils.print_model_biases(model) except: print('WARNING: smart bias initialization failure.') else: print('Warning: Unrecognized Layer Type: ' + mdef['type']) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return module_list, routs
def get_yolo_layers(model)
-
Expand source code
def get_yolo_layers(model): return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo'] # [82, 94, 106] for yolov3
def load_darknet_weights(self, weights, cutoff=-1)
-
Expand source code
def load_darknet_weights(self, weights, cutoff=-1): # Parses and loads the weights stored in 'weights' # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) file = Path(weights).name if file == 'darknet53.conv.74': cutoff = 75 elif file == 'yolov3-tiny.conv.15': cutoff = 15 # Read weights file with open(weights, 'rb') as f: # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training weights = np.fromfile(f, dtype=np.float32) # the rest are weights ptr = 0 for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): if mdef['type'] == 'convolutional': conv_layer = module[0] if mdef['batch_normalize']: # Load BN bias, weights, running mean and running variance bn_layer = module[1] num_b = bn_layer.bias.numel() # Number of biases # Bias bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias) bn_layer.bias.data.copy_(bn_b) ptr += num_b # Weight bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight) bn_layer.weight.data.copy_(bn_w) ptr += num_b # Running Mean bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) bn_layer.running_mean.data.copy_(bn_rm) ptr += num_b # Running Var bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) bn_layer.running_var.data.copy_(bn_rv) ptr += num_b else: # Load conv. bias num_b = conv_layer.bias.numel() conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias) conv_layer.bias.data.copy_(conv_b) ptr += num_b # Load conv. weights num_w = conv_layer.weight.numel() conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) conv_layer.weight.data.copy_(conv_w) ptr += num_w
def save_weights(self, path='model.weights', cutoff=-1)
-
Expand source code
def save_weights(self, path='model.weights', cutoff=-1): # Converts a PyTorch model to Darket format (*.pt to *.weights) # Note: Does not work if model.fuse() is applied with open(path, 'wb') as f: # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 self.version.tofile(f) # (int32) version info: major, minor, revision self.seen.tofile(f) # (int64) number of images seen during training # Iterate through layers for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): if mdef['type'] == 'convolutional': conv_layer = module[0] # If batch norm, load bn first if mdef['batch_normalize']: bn_layer = module[1] bn_layer.bias.data.cpu().numpy().tofile(f) bn_layer.weight.data.cpu().numpy().tofile(f) bn_layer.running_mean.data.cpu().numpy().tofile(f) bn_layer.running_var.data.cpu().numpy().tofile(f) # Load conv bias else: conv_layer.bias.data.cpu().numpy().tofile(f) # Load conv weights conv_layer.weight.data.cpu().numpy().tofile(f)
Classes
class Darknet (cfg, img_size=(416, 416), arc='default')
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class Darknet(nn.Module): # YOLOv3 object detection model def __init__(self, cfg, img_size=(416, 416), arc='default'): super(Darknet, self).__init__() self.module_defs = parse_model_cfg(cfg) self.module_list, self.routs = create_modules(self.module_defs, img_size, arc) self.yolo_layers = get_yolo_layers(self) # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training def forward(self, x, var=None): img_size = x.shape[-2:] layer_outputs = [] output = [] for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)): mtype = mdef['type'] if mtype in ['convolutional', 'upsample', 'maxpool']: x = module(x) elif mtype == 'route': layers = [int(x) for x in mdef['layers'].split(',')] if len(layers) == 1: x = layer_outputs[layers[0]] else: try: x = torch.cat([layer_outputs[i] for i in layers], 1) except: # apply stride 2 for darknet reorg layer layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5]) x = torch.cat([layer_outputs[i] for i in layers], 1) # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape) elif mtype == 'shortcut': x = x + layer_outputs[int(mdef['from'])] elif mtype == 'yolo': output.append(module(x, img_size)) layer_outputs.append(x if i in self.routs else []) if self.training: return output elif ONNX_EXPORT: x = [torch.cat(x, 0) for x in zip(*output)] return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 else: io, p = list(zip(*output)) # inference output, training output return torch.cat(io, 1), p def fuse(self): # Fuse Conv2d + BatchNorm2d layers throughout model fused_list = nn.ModuleList() for a in list(self.children())[0]: if isinstance(a, nn.Sequential): for i, b in enumerate(a): if isinstance(b, nn.modules.batchnorm.BatchNorm2d): # fuse this bn layer with the previous conv2d layer conv = a[i - 1] fused = torch_utils.fuse_conv_and_bn(conv, b) a = nn.Sequential(fused, *list(a.children())[i + 1:]) break fused_list.append(a) self.module_list = fused_list
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, x, var=None)
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, x, var=None): img_size = x.shape[-2:] layer_outputs = [] output = [] for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)): mtype = mdef['type'] if mtype in ['convolutional', 'upsample', 'maxpool']: x = module(x) elif mtype == 'route': layers = [int(x) for x in mdef['layers'].split(',')] if len(layers) == 1: x = layer_outputs[layers[0]] else: try: x = torch.cat([layer_outputs[i] for i in layers], 1) except: # apply stride 2 for darknet reorg layer layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5]) x = torch.cat([layer_outputs[i] for i in layers], 1) # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape) elif mtype == 'shortcut': x = x + layer_outputs[int(mdef['from'])] elif mtype == 'yolo': output.append(module(x, img_size)) layer_outputs.append(x if i in self.routs else []) if self.training: return output elif ONNX_EXPORT: x = [torch.cat(x, 0) for x in zip(*output)] return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 else: io, p = list(zip(*output)) # inference output, training output return torch.cat(io, 1), p
def fuse(self)
-
Expand source code
def fuse(self): # Fuse Conv2d + BatchNorm2d layers throughout model fused_list = nn.ModuleList() for a in list(self.children())[0]: if isinstance(a, nn.Sequential): for i, b in enumerate(a): if isinstance(b, nn.modules.batchnorm.BatchNorm2d): # fuse this bn layer with the previous conv2d layer conv = a[i - 1] fused = torch_utils.fuse_conv_and_bn(conv, b) a = nn.Sequential(fused, *list(a.children())[i + 1:]) break fused_list.append(a) self.module_list = fused_list
class MemoryEfficientSwish
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class MemoryEfficientSwish(nn.Module): def forward(self, x): return SwishImplementation.apply(x)
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, x)
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, x): return SwishImplementation.apply(x)
class Mish
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class Mish(nn.Module): # https://github.com/digantamisra98/Mish def forward(self, x): return x.mul_(F.softplus(x).tanh())
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, x)
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, x): return x.mul_(F.softplus(x).tanh())
class Swish
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class Swish(nn.Module): def forward(self, x): return x.mul_(torch.sigmoid(x))
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, x)
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, x): return x.mul_(torch.sigmoid(x))
class SwishImplementation (...)
-
Records operation history and defines formulas for differentiating ops.
Every operation performed on :class:
Tensor
s creates a new function object, that performs the computation, and records that it happened. The history is retained in the form of a DAG of functions, with edges denoting data dependencies (input <- output
). Then, when backward is called, the graph is processed in the topological ordering, by calling :func:backward
methods of each :class:Function
object, and passing returned gradients on to next :class:Function
s.Normally, the only way users interact with functions is by creating subclasses and defining new operations. This is a recommended way of extending torch.autograd.
Each function object is meant to be used only once (in the forward pass).
Examples::
>>> class Exp(Function): >>> >>> @staticmethod >>> def forward(ctx, i): >>> result = i.exp() >>> ctx.save_for_backward(result) >>> return result >>> >>> @staticmethod >>> def backward(ctx, grad_output): >>> result, = ctx.saved_tensors >>> return grad_output * result
Expand source code
class SwishImplementation(torch.autograd.Function): @staticmethod def forward(ctx, i): ctx.save_for_backward(i) return i * torch.sigmoid(i) @staticmethod def backward(ctx, grad_output): sigmoid_i = torch.sigmoid(ctx.saved_variables[0]) return grad_output * (sigmoid_i * (1 + ctx.saved_variables[0] * (1 - sigmoid_i)))
Ancestors
- torch.autograd.function.Function
- torch._C._FunctionBase
- torch.autograd.function._ContextMethodMixin
- torch.autograd.function._HookMixin
Static methods
def backward(ctx, grad_output)
-
Defines a formula for differentiating the operation.
This function is to be overridden by all subclasses.
It must accept a context :attr:
ctx
as the first argument, followed by as many outputs did :func:forward
return, and it should return as many tensors, as there were inputs to :func:forward
. Each argument is the gradient w.r.t the given output, and each returned value should be the gradient w.r.t. the corresponding input.The context can be used to retrieve tensors saved during the forward pass. It also has an attribute :attr:
ctx.needs_input_grad
as a tuple of booleans representing whether each input needs gradient. E.g., :func:backward
will havectx.needs_input_grad[0] = True
if the first input to :func:forward
needs gradient computated w.r.t. the output.Expand source code
@staticmethod def backward(ctx, grad_output): sigmoid_i = torch.sigmoid(ctx.saved_variables[0]) return grad_output * (sigmoid_i * (1 + ctx.saved_variables[0] * (1 - sigmoid_i)))
def forward(ctx, i)
-
Performs the operation.
This function is to be overridden by all subclasses.
It must accept a context ctx as the first argument, followed by any number of arguments (tensors or other types).
The context can be used to store tensors that can be then retrieved during the backward pass.
Expand source code
@staticmethod def forward(ctx, i): ctx.save_for_backward(i) return i * torch.sigmoid(i)
class YOLOLayer (anchors, nc, img_size, yolo_index, arc)
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class YOLOLayer(nn.Module): def __init__(self, anchors, nc, img_size, yolo_index, arc): super(YOLOLayer, self).__init__() self.anchors = torch.Tensor(anchors) self.na = len(anchors) # number of anchors (3) self.nc = nc # number of classes (80) self.no = nc + 5 # number of outputs self.nx = 0 # initialize number of x gridpoints self.ny = 0 # initialize number of y gridpoints self.arc = arc if ONNX_EXPORT: # grids must be computed in __init__ stride = [32, 16, 8][yolo_index] # stride of this layer nx = int(img_size[1] / stride) # number x grid points ny = int(img_size[0] / stride) # number y grid points create_grids(self, img_size, (nx, ny)) def forward(self, p, img_size, var=None): if ONNX_EXPORT: bs = 1 # batch size else: bs, _, ny, nx = p.shape # bs, 255, 13, 13 if (self.nx, self.ny) != (nx, ny): create_grids(self, img_size, (nx, ny), p.device, p.dtype) # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction if self.training: return p elif ONNX_EXPORT: # Constants CAN NOT BE BROADCAST, ensure correct shape! m = self.na * self.nx * self.ny grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view(m, 2) anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view(m, 2) / self.ng p = p.view(m, self.no) xy = torch.sigmoid(p[:, 0:2]) + grid_xy # x, y wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height p_cls = torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf return p_cls, xy / self.ng, wh else: # inference # s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2) io = p.clone() # inference output io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid_xy # xy io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method io[..., :4] *= self.stride if 'default' in self.arc: # seperate obj and cls torch.sigmoid_(io[..., 4:]) elif 'BCE' in self.arc: # unified BCE (80 classes) torch.sigmoid_(io[..., 5:]) io[..., 4] = 1 elif 'CE' in self.arc: # unified CE (1 background + 80 classes) io[..., 4:] = F.softmax(io[..., 4:], dim=4) io[..., 4] = 1 if self.nc == 1: io[..., 5] = 1 # single-class model https://github.com/ultralytics/yolov3/issues/235 # reshape from [1, 3, 13, 13, 85] to [1, 507, 85] return io.view(bs, -1, self.no), p
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, p, img_size, var=None)
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, p, img_size, var=None): if ONNX_EXPORT: bs = 1 # batch size else: bs, _, ny, nx = p.shape # bs, 255, 13, 13 if (self.nx, self.ny) != (nx, ny): create_grids(self, img_size, (nx, ny), p.device, p.dtype) # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction if self.training: return p elif ONNX_EXPORT: # Constants CAN NOT BE BROADCAST, ensure correct shape! m = self.na * self.nx * self.ny grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view(m, 2) anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view(m, 2) / self.ng p = p.view(m, self.no) xy = torch.sigmoid(p[:, 0:2]) + grid_xy # x, y wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height p_cls = torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf return p_cls, xy / self.ng, wh else: # inference # s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2) io = p.clone() # inference output io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid_xy # xy io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method io[..., :4] *= self.stride if 'default' in self.arc: # seperate obj and cls torch.sigmoid_(io[..., 4:]) elif 'BCE' in self.arc: # unified BCE (80 classes) torch.sigmoid_(io[..., 5:]) io[..., 4] = 1 elif 'CE' in self.arc: # unified CE (1 background + 80 classes) io[..., 4:] = F.softmax(io[..., 4:], dim=4) io[..., 4] = 1 if self.nc == 1: io[..., 5] = 1 # single-class model https://github.com/ultralytics/yolov3/issues/235 # reshape from [1, 3, 13, 13, 85] to [1, 507, 85] return io.view(bs, -1, self.no), p