A Large Scale Fish Dataset 图像分割与识别

下面这些为实验课上的任务要求和说明。

数据集介绍:

此数据集包含从土耳其伊兹密尔的一家超市收集的9种不同的海鲜类型,数据集包括镀金头鲷、红鲷、鲈鱼、红鲻鱼、竹荚鱼、黑海鲱鱼,条纹红鲻鱼,鳟鱼,虾图像样本。(label分别为gilt head bream, red sea bream, sea bass, red mullet, horse mackerel, black sea sprat, striped red mullet, trout, shrimp image samples)

论文材料:

O. Ulucan, D. Karakaya and M. Turkan, "A Large-Scale Dataset for Fish Segmentation and Classification," 2020 Innovations in Intelligent Systems and Applications Conference (ASYU), 2020, pp. 1-5, doi: 10.1109/ASYU50717.2020.9259867.

数据集下载:

https://www.kaggle.com/crowww/a-large-scale-fish-dataset

图像格式:

    图像是通过2台不同的相机收集的,柯达Easyshare Z650 和三星 ST60。因此,图像的分辨率分别为2832x2128,1024x =768。在分割和分类过程之前,数据集的大小已调整为 590x445。通过保留纵横比。调整图像大小后,数据集中的所有标签都得到了增强(通过翻转和旋转)。在增强过程结束时,每个类的总图像数量变为2000;其中1000是RGB图像和另外1000作为他们对应的ground-truth图像标签。

数据集的描述

    该数据集包含9种不同的海鲜类型。对于每个类,有1000个增强图像及ground-truth图像标签。每个类都可以在带有其真实标签的“Fish_Dataset”文件中找到。每个类的所有图像按“00000.png”到“01000.png”排序。例如,如果要访问数据集中虾真实标签图像,则应遵循"Fish->Shrimp->Shrimp GT"的顺序。

任务要求

1.图像分割:

训练一个深度神经网络分割出图片中海鲜物体,

1)在测试集上形成如下所示的双色图,每一海鲜种类生成一个,共9个;

2)统计分割结果,打印出训练集loss和accuracy和测试集loss和accuracy。

  2.图像分类

训练深度神经网络对海鲜图片数据集进行分类,在测试集上生成分类结果图9张(每一类各一张)(如下图,可在每个子图的标题上标记真实label和预测label;

图像分割训练结果如下

图像分类训练结果如下

图像分割

        图像分割我用的是Unet+ResNet34,模型也是从网上复制的。

        主要是完成了一些数据导入与训练。

        首先是数据导入部分,主要参考了小土堆的教程。

import torch
from torch.utils.data import Dataset, ConcatDataset, DataLoader
from PIL import Image
import os
import matplotlib.pyplot as plt
from torchvision import transforms


class MyData(Dataset):
    def __init__(self, root_dir, label_dir, transfrom):
        self.root_dir = os.path.join(root_dir, label_dir)
        self.label_dir = label_dir
        self.path = os.path.join(self.root_dir, self.label_dir)
        self.mpath = os.path.join(self.root_dir, self.label_dir + " GT")
        self.img_path = os.listdir(self.path)
        self.mask_path = os.listdir(self.mpath)
        self.myTransforms = transfrom
    def __getitem__(self, idx):
        img_name = self.img_path[idx]
        mask_name = self.mask_path[idx]
        img_item_path = os.path.join(self.path, img_name)
        mask_item_path = os.path.join(self.mpath, mask_name)
        img = Image.open(img_item_path)
        label = Image.open(mask_item_path)
        img = self.myTransforms(img)
        label = self.myTransforms(label)

        label = label.to (torch.long)
        return img, label

    def __len__(self):
        return len(self.img_path)



Unet+Res34模型,Res34采用了预训练模型,训练速度很快

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import ResNet34_Weights


class DecoderBlock(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, conv_in_channels, conv_out_channels, up_in_channels=None, up_out_channels=None):
        super().__init__()
        """
        eg:
        decoder1:
        up_in_channels      : 1024,     up_out_channels     : 512
        conv_in_channels    : 1024,     conv_out_channels   : 512

        decoder5:
        up_in_channels      : 64,       up_out_channels     : 64
        conv_in_channels    : 128,      conv_out_channels   : 64
        """
        if up_in_channels == None:
            up_in_channels = conv_in_channels
        if up_out_channels == None:
            up_out_channels = conv_out_channels

        self.up = nn.ConvTranspose2d(up_in_channels, up_out_channels, kernel_size=2, stride=2)
        self.conv = nn.Sequential(
            nn.Conv2d(conv_in_channels, conv_out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(conv_out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(conv_out_channels, conv_out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(conv_out_channels),
            nn.ReLU(inplace=True)
        )

    # x1-upconv , x2-downconv
    def forward(self, x1, x2):
        x1 = self.up(x1)
        x = torch.cat([x1, x2], dim=1)
        return self.conv(x)


class UnetResnet34(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        resnet34 = torchvision.models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
        filters = [64, 128, 256, 512]

        self.firstlayer = nn.Sequential(*list(resnet34.children())[:3])
        self.maxpool = list(resnet34.children())[3]
        self.encoder1 = resnet34.layer1
        self.encoder2 = resnet34.layer2
        self.encoder3 = resnet34.layer3
        self.encoder4 = resnet34.layer4

        self.bridge = nn.Sequential(
            nn.Conv2d(filters[3], filters[3] * 2, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(filters[3] * 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)

        )

        self.decoder1 = DecoderBlock(conv_in_channels=filters[3] * 2, conv_out_channels=filters[3])
        self.decoder2 = DecoderBlock(conv_in_channels=filters[3], conv_out_channels=filters[2])
        self.decoder3 = DecoderBlock(conv_in_channels=filters[2], conv_out_channels=filters[1])
        self.decoder4 = DecoderBlock(conv_in_channels=filters[1], conv_out_channels=filters[0])
        self.decoder5 = DecoderBlock(
            conv_in_channels=filters[1], conv_out_channels=filters[0], up_in_channels=filters[0],
            up_out_channels=filters[0]
        )

        self.lastlayer = nn.Sequential(
            nn.ConvTranspose2d(in_channels=filters[0], out_channels=filters[0], kernel_size=2, stride=2),
            nn.Conv2d(filters[0], num_classes, kernel_size=3, padding=1, bias=False)
        )

    def forward(self, x):
        e1 = self.firstlayer(x)
        maxe1 = self.maxpool(e1)
        e2 = self.encoder1(maxe1)
        e3 = self.encoder2(e2)
        e4 = self.encoder3(e3)
        e5 = self.encoder4(e4)

        c = self.bridge(e5)

        d1 = self.decoder1(c, e5)
        d2 = self.decoder2(d1, e4)
        d3 = self.decoder3(d2, e3)
        d4 = self.decoder4(d3, e2)
        d5 = self.decoder5(d4, e1)

        out = self.lastlayer(d5)

        return out

训练部分代码

root_dir = "Fish_Dataset"

kind_name = ["Black Sea Sprat", "Gilt Head Bream", "Hourse Mackerel", "Red Mullet",
             "Red Sea Bream", "Sea Bass", "Shrimp", "Striped Red Mullet", "Trout"]
dataset = []


def train_test_split(dataset, test_size=0.2):
    length = len(dataset)
    train_length = round(length * (1 - test_size))
    test_length = length - train_length

    train_dataset, test_dataset = random_split(dataset, [train_length, test_length])
    return train_dataset, test_dataset


for i, name in enumerate(kind_name):
    dataset_temp = MyData(root_dir, name, i, myTransforms)
    dataset = ConcatDataset([dataset, dataset_temp])

train_data, test_data = train_test_split(dataset)
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集长度为{}".format(train_data_size))
print("测试数据集长度为{}".format(test_data_size))




#利用dataloader 加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

#搭载神经网络
# 定义模型
myModel = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
# 将原来的ResNet18的最后两层全连接层拿掉,替换成一个输出单元为9的全连接层
inchannel = myModel.fc.in_features
myModel.fc = nn.Linear(inchannel, 9)

myModel.cuda()
#损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.cuda()

#优化器
learning_rate = 0.001
optimizer = torch.optim.SGD(myModel.parameters(), lr=learning_rate)

#设置训练网络的一些参数
#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0

# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("./ResNet_train")

for i in range(epoch):
    since = time.time()
    print("----------------第{}轮训练开始----------------".format(i+1))
    total_train_loss = 0
    total_train_accuracy = 0
    #训练步骤开始
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.cuda()
        targets = targets.cuda()
        ouputs = myModel(imgs)
        loss = loss_fn(ouputs, targets)

        #优化器模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss = total_train_loss +loss.item()
        total_train_step = total_train_step + 1
        train_accuracy = (ouputs.argmax(1) == targets).sum()
        total_train_accuracy = total_train_accuracy + train_accuracy
        # if total_train_step % 10 == 0:
        #     print("训练次数:{}, train_loss:{}".format(total_train_step, loss.item()))
        #     print("训练次数:{}, train_accuracy:{}".format(train_accuracy, loss.item()))

    #测试步骤开始
    total_test_loss = 0
    total_test_accuracy = 0
    myModel.eval()
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.cuda()
            targets = targets.cuda()
            ouputs = myModel(imgs)
            loss = loss_fn(ouputs, targets)
            total_test_loss = total_test_loss + loss.item()
            test_accuracy = (ouputs.argmax(1) == targets).sum()
            total_test_step = total_test_step + 1
            total_test_accuracy = total_test_accuracy + test_accuracy
    print("整体训练集上的Loss: {}".format(total_train_loss))
    print("整体训练集上的正确率: {}".format(total_train_accuracy/train_data_size))
    print("整体测试集上的Loss: {}".format(total_test_loss))
    print("整体测试集上的正确率: {}".format(total_test_accuracy/test_data_size))
    print("第{}轮耗时{:.2f}min".format(i+1,(time.time()-since)/60))
    total_test_step = total_test_step + 1
writer.close()

预测部分代码,可自己另作修改,注意预测代码的图片也需要调整大小。

import os
import matplotlib.pyplot as plt
import numpy as np
import torch.optim
import torchvision
from torch import nn
from torch.utils.data import DataLoader, random_split, ConcatDataset
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
from Read_Data_predit import MyData
from PIL import Image
myTransforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
   ])

#准备数据集
root_dir = "Fish_Dataset"

kind_name = ["Black Sea Sprat", "Gilt Head Bream", "Hourse Mackerel", "Red Mullet",
             "Red Sea Bream", "Sea Bass", "Shrimp", "Striped Red Mullet", "Trout"]
dataset = []


fig, axes = plt.subplots(9, 3)
myModel = torch.load("Res34_Unet.pth")
# fig, axes = plt.subplots(10, 2, figsize=(10, 50))
# for i in range(10):
#     image, mask = dataset[i]
#     axes[i, 0].imshow(image.permute(1, 2, 0).numpy().reshape(445, 590, 3))
#     axes[i, 1].imshow(mask.numpy().reshape(256, 256))  # input size : 445x590
# plt.show()
myModel.eval()
with torch.no_grad():
    for i, name in enumerate(kind_name):
        img_root_dir = os.path.join(root_dir,name)
        mask_root_dir = os.path.join(root_dir, name)
        img_item_path = os.path.join(img_root_dir, name, "00010.png")#取每类第一张进行验证
        mask_item_path = os.path.join(mask_root_dir, name+" GT", "00010.png")  # 取每类第一张进行验证
        img = Image.open(img_item_path)
        mask = Image.open(mask_item_path)
        data_transform = myTransforms(img)
        data_transform = data_transform.unsqueeze(dim=0)  # 添加一维batch
        data_transform = data_transform.cuda()
        output = myModel(data_transform)
        output = torch.squeeze(output) # 去掉batch_size 维度
        output = torch.squeeze(output[1]) # 第0个是背景 第1个是目标
        output = output.cpu()
        output = output.numpy().reshape(256, 256)
        binary_predicted_mask = np.where(output > 0.5, 1, 0)
        axes[i, 0].imshow(img.resize((256, 256)))
        axes[i, 1].imshow(mask.resize((256, 256)))
        axes[i, 2].imshow(binary_predicted_mask)
plt.show()

图像识别

        这部分就比较简单了,直接使用预训练模型ResNet50。

        首先是数据导入。

import torch
from torch.utils.data import Dataset, ConcatDataset, random_split
from PIL import Image
import os
import numpy as np


class MyData(Dataset):
    def __init__(self, root_dir, label_dir, class_num, transforms):
        self.root_dir = os.path.join(root_dir, label_dir)
        self.label_dir = label_dir
        self.class_num = class_num
        self.transforms = transforms
        self.path = os.path.join(self.root_dir, self.label_dir)
        self.img_path = os.listdir(self.path)

    def __getitem__(self, idx):
        img_name = self.img_path[idx]
        img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
        img = Image.open(img_item_path)
        img = self.transforms(img)
        label = self.class_num
        return img, label

    def __len__(self):
        return len(self.img_path)


# root_dir = "Fish_Dataset"
#
# kind_name = ["Black Sea Sprat", "Gilt Head Bream", "Hourse Mackerel", "Red Mullet",
#              "Red Sea Bream", "Sea Bass", "Shrimp", "Striped Red Mullet", "Trout"]
# dataset = []
#
#
# def train_test_split(dataset, test_size=0.2):
#     length = len(dataset)
#     train_length = round(length * (1 - test_size))
#     test_length = length - train_length
#
#     train_dataset, test_dataset = random_split(dataset, [train_length, test_length])
#     return train_dataset, test_dataset
#
#
# for i, name in enumerate(kind_name):
#     dataset_temp = MyData(root_dir, name, i)
#     dataset = ConcatDataset([dataset, dataset_temp])
#
# train_data, test_data = train_test_split(dataset)
# train_data_size = len(train_data)
# test_data_size = len(test_data)
# print("训练数据集长度为{}".format(train_data_size))
# print("测试数据集长度为{}".format(test_data_size))

使用torchvision自带的模型resnet50,基本3轮准确率就可以到90%以上。

import time

import torch.optim
import torchvision
from torch import nn
from torch.utils.data import DataLoader, random_split, ConcatDataset
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
from Read_Data import MyData

myTransforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

#准备数据集
root_dir = "Fish_Dataset"

kind_name = ["Black Sea Sprat", "Gilt Head Bream", "Hourse Mackerel", "Red Mullet",
             "Red Sea Bream", "Sea Bass", "Shrimp", "Striped Red Mullet", "Trout"]
dataset = []


def train_test_split(dataset, test_size=0.2):
    length = len(dataset)
    train_length = round(length * (1 - test_size))
    test_length = length - train_length

    train_dataset, test_dataset = random_split(dataset, [train_length, test_length])
    return train_dataset, test_dataset


for i, name in enumerate(kind_name):
    dataset_temp = MyData(root_dir, name, i, myTransforms)
    dataset = ConcatDataset([dataset, dataset_temp])

train_data, test_data = train_test_split(dataset)
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集长度为{}".format(train_data_size))
print("测试数据集长度为{}".format(test_data_size))




#利用dataloader 加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

#搭载神经网络
# 定义模型
myModel = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
# 将原来的ResNet18的最后两层全连接层拿掉,替换成一个输出单元为9的全连接层
inchannel = myModel.fc.in_features
myModel.fc = nn.Linear(inchannel, 9)

myModel.cuda()
#损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.cuda()

#优化器
learning_rate = 0.001
optimizer = torch.optim.SGD(myModel.parameters(), lr=learning_rate)

#设置训练网络的一些参数
#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0

# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("./ResNet_train")

for i in range(epoch):
    since = time.time()
    print("----------------第{}轮训练开始----------------".format(i+1))
    total_train_loss = 0
    total_train_accuracy = 0
    #训练步骤开始
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.cuda()
        targets = targets.cuda()
        ouputs = myModel(imgs)
        loss = loss_fn(ouputs, targets)

        #优化器模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss = total_train_loss +loss.item()
        total_train_step = total_train_step + 1
        train_accuracy = (ouputs.argmax(1) == targets).sum()
        total_train_accuracy = total_train_accuracy + train_accuracy
        # if total_train_step % 10 == 0:
        #     print("训练次数:{}, train_loss:{}".format(total_train_step, loss.item()))
        #     print("训练次数:{}, train_accuracy:{}".format(train_accuracy, loss.item()))

    #测试步骤开始
    total_test_loss = 0
    total_test_accuracy = 0
    myModel.eval()
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.cuda()
            targets = targets.cuda()
            ouputs = myModel(imgs)
            loss = loss_fn(ouputs, targets)
            total_test_loss = total_test_loss + loss.item()
            test_accuracy = (ouputs.argmax(1) == targets).sum()
            total_test_step = total_test_step + 1
            total_test_accuracy = total_test_accuracy + test_accuracy
    print("整体训练集上的Loss: {}".format(total_train_loss))
    print("整体训练集上的正确率: {}".format(total_train_accuracy/train_data_size))
    print("整体测试集上的Loss: {}".format(total_test_loss))
    print("整体测试集上的正确率: {}".format(total_test_accuracy/test_data_size))
    print("第{}轮耗时{:.2f}min".format(i+1,(time.time()-since)/60))
    total_test_step = total_test_step + 1
torch.save(model, 'Resnet50_fish.pth')
writer.close()
#以下结果预测代码
import os
import matplotlib.pyplot as plt
import torch.optim
import torchvision
from torch import nn
from torch.utils.data import DataLoader, random_split, ConcatDataset
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
from Read_Data_predit import MyData
from PIL import Image
myTransforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

#准备数据集
root_dir = "NA_Fish_Dataset"

kind_name = ["Black Sea Sprat", "Gilt Head Bream", "Hourse Mackerel", "Red Mullet",
             "Red Sea Bream", "Sea Bass", "Shrimp", "Striped Red Mullet", "Trout"]
dataset = []


fig, axes = plt.subplots(3, 3)
myModel = torch.load("Resnet50_fish.pth")

myModel.eval()
with torch.no_grad():
    for i, name in enumerate(kind_name):
        img_item_path = os.path.join(root_dir, name, "00001.png")#取每类第一张进行验证
        img = Image.open(img_item_path)
        axes[i // 3, i % 3].imshow(img)
        data_transform = myTransforms(img)
        data_transform = data_transform.unsqueeze(dim=0)  # 添加一维batch
        data_transform = data_transform.cuda()
        output = myModel(data_transform)
        output = output.argmax(1)
        axes[i // 3, i % 3].set_title("True:"+kind_name[i]+"
Predited:"+kind_name[output] , loc = "center")
plt.show()