caffe在构造TrainNet时,首先要做的便是data和label的输入。数据层在Caffe的Data Layer中位于最底层,数据可以在高效的数据库中读取,也可以直接在内存中读取,或者从硬盘中直接读取
1. 数据层 LevedB 和 LMDB
层类型为data,主要分为LevedB 和 LMDB
其在*.prototxt文件中如下所示:
layer {name: "mnist"type: "Data"top: "data"top: "label"include {phase: TRAIN}transform param{ scale = 0.00390625 }data_param{source: "example/mnist/mnist_train.lmdb"}batch_size:64backend: LMDB
}
这里给出一种生成这种pycaffe的生成代码:
n.data, n.label = L.Data(name = 'MNIST', source = "example/mnist/mnist_train.lmdb", batch_size = 64, include = {'phase':0}, backend = P.Data.LMDB, ntop = 2, transform_param = dict(scale = 0.00390625))
2. 内存数据
layer {name: "memory_data"type: "MemoryData"top: "data"top: "label"memory_data_param{batch_size:2height: 100width: 100channels: 1}transform param{ scale: 0.0078125mean_file: "mean.proto"mirror: false}
}
3. HDF5数据
layer{name: "data"type: "HDF5Data"top: "data"top: "label"hdf5_data_param{source: "example/mnist/mnist_train.lmdb"batch_size: 10}
}
pycaffe接口:
n.data, n.label = L.HDF5Data(name = 'data', source = "example/mnist/mnist_train.lmdb", batch_size = 10, ntop = 2)
4. 图像数据Images
layer {name: "data"type: "ImageData"top: "data"top: "label"image_data_param{source: "example/mnist/mnist_train.lmdb"batch_size:50new_height: 256new_width: 256}transform param{ mirror: falsecrop_size: 227mean_file : "mean.proto"}
}
事实caffe可以自己来定义数据输入层,比如FCN代码的Python Layer形式,通过继承data layer的一些函数:
我直接把FCN中的PythonLayer放上来,这里仅仅作为事例:
import caffeimport numpy as np
from PIL import Imageimport randomclass VOCSegDataLayer(caffe.Layer):"""Load (input image, label image) pairs from PASCAL VOCone-at-a-time while reshaping the net to preserve dimensions.Use this to feed data to a fully convolutional network."""def setup(self, bottom, top):"""Setup data layer according to parameters:- voc_dir: path to PASCAL VOC year dir- split: train / val / test- mean: tuple of mean values to subtract- randomize: load in random order (default: True)- seed: seed for randomization (default: None / current time)for PASCAL VOC semantic segmentation.exampleparams = dict(voc_dir="/path/to/PASCAL/VOC2011",mean=(104.00698793, 116.66876762, 122.67891434),split="val")"""params = eval(self.param_str)self.voc_dir = params['voc_dir']self.split = params['split']self.mean = np.array(params['mean'])self.random = params.get('randomize', True)self.seed = params.get('seed', None)if len(top) != 2:raise Exception("Need to define two tops: data and label.")if len(bottom) != 0:raise Exception("Do not define a bottom.")split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,self.split)self.indices = open(split_f, 'r').read().splitlines()self.idx = 0if 'train' not in self.split:self.random = Falseif self.random:random.seed(self.seed)self.idx = random.randint(0, len(self.indices)-1)def reshape(self, bottom, top):self.data = self.load_image(self.indices[self.idx])self.label = self.load_label(self.indices[self.idx])top[0].reshape(1, *self.data.shape)top[1].reshape(1, *self.label.shape)def forward(self, bottom, top):top[0].data[...] = self.datatop[1].data[...] = self.labelif self.random:self.idx = random.randint(0, len(self.indices)-1)else:self.idx += 1if self.idx == len(self.indices):self.idx = 0def backward(self, top, propagate_down, bottom):passdef load_image(self, idx):"""Load input image and preprocess for Caffe:- cast to float- switch channels RGB -> BGR- subtract mean- transpose to channel x height x width order"""im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))in_ = np.array(im, dtype=np.float32)in_ = in_[:,:,::-1]in_ -= self.meanin_ = in_.transpose((2,0,1))return in_def load_label(self, idx):"""Load label image as 1 x height x width integer array of label indices.The leading singleton dimension is required by the loss."""im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))label = np.array(im, dtype=np.uint8)label = label[np.newaxis, ...]return labelclass SBDDSegDataLayer(caffe.Layer):"""Load (input image, label image) pairs from the SBDD extended labelingof PASCAL VOC for semantic segmentationone-at-a-time while reshaping the net to preserve dimensions.Use this to feed data to a fully convolutional network."""def setup(self, bottom, top):"""Setup data layer according to parameters:- sbdd_dir: path to SBDD `dataset` dir- split: train / seg11valid- mean: tuple of mean values to subtract- randomize: load in random order (default: True)- seed: seed for randomization (default: None / current time)for SBDD semantic segmentation.N.B.segv11alid is the set of segval11 that does not intersect with SBDD.Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.exampleparams = dict(sbdd_dir="/path/to/SBDD/dataset",mean=(104.00698793, 116.66876762, 122.67891434),split="valid")"""params = eval(self.param_str)self.sbdd_dir = params['sbdd_dir']self.split = params['split']self.mean = np.array(params['mean'])self.random = params.get('randomize', True)self.seed = params.get('seed', None)if len(top) != 2:raise Exception("Need to define two tops: data and label.")if len(bottom) != 0:raise Exception("Do not define a bottom.")split_f = '{}/{}.txt'.format(self.sbdd_dir,self.split)self.indices = open(split_f, 'r').read().splitlines()self.idx = 0if 'train' not in self.split:self.random = Falseif self.random:random.seed(self.seed)self.idx = random.randint(0, len(self.indices)-1)def reshape(self, bottom, top):self.data = self.load_image(self.indices[self.idx])self.label = self.load_label(self.indices[self.idx])top[0].reshape(1, *self.data.shape)top[1].reshape(1, *self.label.shape)def forward(self, bottom, top):top[0].data[...] = self.datatop[1].data[...] = self.labelif self.random:self.idx = random.randint(0, len(self.indices)-1)else:self.idx += 1if self.idx == len(self.indices):self.idx = 0def backward(self, top, propagate_down, bottom):passdef load_image(self, idx):"""Load input image and preprocess for Caffe:- cast to float- switch channels RGB -> BGR- subtract mean- transpose to channel x height x width order"""im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))in_ = np.array(im, dtype=np.float32)in_ = in_[:,:,::-1]in_ -= self.meanin_ = in_.transpose((2,0,1))return in_def load_label(self, idx):"""Load label image as 1 x height x width integer array of label indices.The leading singleton dimension is required by the loss."""import scipy.iomat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)label = label[np.newaxis, ...]return labe
在Pycaffe的Net生成中:
n.data, n.label = L.Python(module='voc_layers', layer=pylayer,ntop=2, param_str=str(pydata_params))
Python Layer还是比较好用的,可以随心所欲的定义自己的输入。还是要向大牛前辈学习一个