dataset 源代码

# -*- coding: utf-8 -*-
# @Time    : 2018/8/23 22:18
# @Author  : zhoujun
import pathlib
import numpy as np
from mxnet import image, nd, recordio
import cv2
from mxnet.gluon.data import Dataset, RecordFileDataset


[文档]class ImageDataset(Dataset):
[文档]    def __init__(self, data_txt: str, data_shape: tuple, img_channel: int, num_label: int,
                 alphabet: str, phase: str = 'train'):
        """
        数据集初始化

        :param data_txt: 存储着图片路径和对于label的文件
        :param data_shape: 图片的大小(h,w)
        :param img_channel: 图片通道数
        :param num_label: 最大字符个数,应该和网络最终输出的序列宽度一样
        :param alphabet: 字母表
        """
        super(ImageDataset, self).__init__()
        assert phase in ['train', 'test']

        self.data_list = []
        with open(data_txt, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                line = line.strip('\n').replace('.jpg ', '.jpg\t').split('\t')
                img_path = pathlib.Path(line[0])
                if img_path.exists() and img_path.stat().st_size > 0 and line[1]:
                    self.data_list.append((line[0], line[1]))
        self.img_h = data_shape[0]
        self.img_w = data_shape[1]
        self.img_channel = img_channel
        self.num_label = num_label
        self.alphabet = alphabet
        self.phase = phase
        self.label_dict = {}
        for i, char in enumerate(self.alphabet):
            self.label_dict[char] = i

    def __getitem__(self, idx):
        img_path, label = self.data_list[idx]
        label = label.replace(' ', '')
        try:
            label = self.label_enocder(label)
        except Exception as e:
            print(img_path, label)
        img = self.pre_processing(img_path)
        return img, label

    def __len__(self):
        return len(self.data_list)

[文档]    def label_enocder(self, label):
        """
        对label进行处理，将输入的label字符串转换成在字母表中的索引

        :param label: label字符串
        :return: 索引列表
        """
        tmp_label = nd.zeros(self.num_label, dtype=np.float32) - 1
        for i, ch in enumerate(label):
            tmp_label[i] = self.label_dict[ch]
        return tmp_label

[文档]    def pre_processing(self, img_path):
        """
        对图片进行处理，先按照高度进行resize，resize之后如果宽度不足指定宽度，就补黑色像素，否则就强行缩放到指定宽度

        :param img_path: 图片地址
        :return:
        """
        data_augment = False
        if self.phase == 'train' and np.random.rand() > 0.5:
            data_augment = True
        if data_augment:
            img_h = 40
            img_w = 340
        else:
            img_h = self.img_h
            img_w = self.img_w
        img = image.imdecode(open(img_path, 'rb').read(), 1 if self.img_channel == 3 else 0)
        h, w = img.shape[:2]
        ratio_h = float(img_h) / h
        new_w = int(w * ratio_h)
        if new_w < img_w:
            img = image.imresize(img, w=new_w, h=img_h)
            step = nd.zeros((img_h, img_w - new_w, self.img_channel), dtype=img.dtype)
            img = nd.concat(img, step, dim=1)
        else:
            img = image.imresize(img, w=img_w, h=img_h)

        if data_augment:
            img, _ = image.random_crop(img, (self.img_w, self.img_h))
        return img


[文档]class RecordDataset(RecordFileDataset):
    """
    A dataset wrapping over a RecordIO file contraining images
    Each sample is an image and its corresponding label
    """

[文档]    def __init__(self, filename, data_shape: tuple, img_channel: int, num_label: int):
        super(RecordDataset, self).__init__(filename)
        self.data_shape = data_shape
        self.img_channel = img_channel
        self.num_label = num_label

    def __getitem__(self, idx):
        record = super(RecordDataset, self).__getitem__(idx)
        header, img = recordio.unpack(record)
        img = self.pre_processing(img)
        label = self.label_enocder(header.label)
        return img, label

[文档]    def label_enocder(self, label):
        """
        对label进行处理，将输入的label字符串转换成在字母表中的索引
        
        :param label: label字符串
        :return: 索引列表
        """
        label = nd.array(label)
        tmp_label = nd.zeros(self.num_label - len(label), dtype=np.float32) - 1
        label = nd.concat(label, tmp_label, dim=0)
        return label

[文档]    def pre_processing(self, img):
        """
        对图片进行处理
        :param img_path: 图片
        :return:
        """
        img = image.imdecode(img, 1 if self.img_channel == 3 else 0)
        h, w = img.shape[:2]
        ratio_h = float(self.data_shape[0]) / h
        new_w = int(w * ratio_h)
        if new_w < self.data_shape[1]:
            img = image.imresize(img, w=new_w, h=self.data_shape[0])
            step = nd.zeros((self.data_shape[0], self.data_shape[1] - new_w, self.img_channel), dtype=img.dtype)
            img = nd.concat(img, step, dim=1)
        else:
            img = image.imresize(img, w=self.data_shape[1], h=self.data_shape[0])
        return img


if __name__ == '__main__':
    import keys
    import time
    from mxnet.gluon.data import DataLoader
    from matplotlib import pyplot as plt
    from matplotlib.font_manager import FontProperties

    from mxnet.gluon.data.vision.transforms import ToTensor
    from predict import decode

    font = FontProperties(fname=r"simsun.ttc", size=14)
    alphabet = keys.txt_alphabet
    dataset = ImageDataset('/data/zhy/crnn/Chinese_character/train2.txt', (32, 320), 3, 81, alphabet)
    # dataset = RecordDataset('/data1/zj/data/crnn/txt/val.rec', (32, 320), 3, 81)
    data_loader = DataLoader(dataset.transform_first(ToTensor()), 128, shuffle=True, num_workers=12)
    all_step = dataset.__len__() // 128
    start = time.time()
    for i, (img, label) in enumerate(data_loader):
        cur_step = 0 * all_step + i
        if (i + 1) % 100 == 0:
            print(time.time() - start)
            start = time.time()
    print(cur_step)
        # start = time.time()
        # print(label.shape)
        # result = decode(label.asnumpy(), alphabet)
        # img1 = img[0].asnumpy().transpose(1, 2, 0)
        # print(result[0])
        # plt.title(result[0], FontProperties=font)
        # plt.imshow(img1)
        # plt.show()
        # break