您当前的位置：首页 > IT编程 > python
\| C语言 \| Java \| VB \| VC \| python \| Android \| TensorFlow \| C++ \| oracle \| 学术与代码 \| cnn卷积神经网络 \| gnn \| 图像修复 \| Keras \| 数据集 \| Neo4j \| 自然语言处理 \| 深度学习 \| 医学CAD \| 医学影像 \| 超参数 \| pointnet \| pytorch \| 异常检测 \| Transformers \| 情感分类 \| 知识图谱 \|

自学教程：Python实现GIF动图以及视频卡通化详解

51自学网 2022-02-21 10:46:18

python

这篇教程Python实现GIF动图以及视频卡通化详解写得很实用，希望能帮到您。

前言

参考文章：Python实现照片卡通化

我继续魔改一下，让该模型可以支持将gif动图或者视频，也做成卡通化效果。毕竟一张图可以那就带边视频也可以，没毛病。所以继给次元壁来了一拳，我在加两脚。

项目github地址：github地址

环境依赖

除了参考文章中的依赖，还需要加一些其他依赖，requirements.txt如下：

其他环境不太清楚的，可以看我前言链接地址的文章，有具体说明。

核心代码

不废话了，先上gif代码。

gif动图卡通化

实现代码如下：

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2021/12/5 18:10# @Author  : 剑客阿良_ALiang# @Site    : # @File    : gif_cartoon_tool.py# !/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2021/12/5 0:26# @Author  : 剑客阿良_ALiang# @Site    :# @File    : video_cartoon_tool.py # !/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2021/12/4 22:34# @Author  : 剑客阿良_ALiang# @Site    :# @File    : image_cartoon_tool.py from PIL import Image, ImageEnhance, ImageSequenceimport torchfrom torchvision.transforms.functional import to_tensor, to_pil_imagefrom torch import nnimport osimport torch.nn.functional as Fimport uuidimport imageio  # -------------------------- hy add 01 --------------------------class ConvNormLReLU(nn.Sequential):    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, pad_mode="reflect", groups=1, bias=False):        pad_layer = {            "zero": nn.ZeroPad2d,            "same": nn.ReplicationPad2d,            "reflect": nn.ReflectionPad2d,        }        if pad_mode not in pad_layer:            raise NotImplementedError         super(ConvNormLReLU, self).__init__(            pad_layer[pad_mode](padding),            nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=bias),            nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True),            nn.LeakyReLU(0.2, inplace=True)        )  class InvertedResBlock(nn.Module):    def __init__(self, in_ch, out_ch, expansion_ratio=2):        super(InvertedResBlock, self).__init__()         self.use_res_connect = in_ch == out_ch        bottleneck = int(round(in_ch * expansion_ratio))        layers = []        if expansion_ratio != 1:            layers.append(ConvNormLReLU(in_ch, bottleneck, kernel_size=1, padding=0))         # dw        layers.append(ConvNormLReLU(bottleneck, bottleneck, groups=bottleneck, bias=True))        # pw        layers.append(nn.Conv2d(bottleneck, out_ch, kernel_size=1, padding=0, bias=False))        layers.append(nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True))         self.layers = nn.Sequential(*layers)     def forward(self, input):        out = self.layers(input)        if self.use_res_connect:            out = input + out        return out  class Generator(nn.Module):    def __init__(self, ):        super().__init__()         self.block_a = nn.Sequential(            ConvNormLReLU(3, 32, kernel_size=7, padding=3),            ConvNormLReLU(32, 64, stride=2, padding=(0, 1, 0, 1)),            ConvNormLReLU(64, 64)        )         self.block_b = nn.Sequential(            ConvNormLReLU(64, 128, stride=2, padding=(0, 1, 0, 1)),            ConvNormLReLU(128, 128)        )         self.block_c = nn.Sequential(            ConvNormLReLU(128, 128),            InvertedResBlock(128, 256, 2),            InvertedResBlock(256, 256, 2),            InvertedResBlock(256, 256, 2),            InvertedResBlock(256, 256, 2),            ConvNormLReLU(256, 128),        )         self.block_d = nn.Sequential(            ConvNormLReLU(128, 128),            ConvNormLReLU(128, 128)        )         self.block_e = nn.Sequential(            ConvNormLReLU(128, 64),            ConvNormLReLU(64, 64),            ConvNormLReLU(64, 32, kernel_size=7, padding=3)        )         self.out_layer = nn.Sequential(            nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False),            nn.Tanh()        )     def forward(self, input, align_corners=True):        out = self.block_a(input)        half_size = out.size()[-2:]        out = self.block_b(out)        out = self.block_c(out)         if align_corners:            out = F.interpolate(out, half_size, mode="bilinear", align_corners=True)        else:            out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)        out = self.block_d(out)         if align_corners:            out = F.interpolate(out, input.size()[-2:], mode="bilinear", align_corners=True)        else:            out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)        out = self.block_e(out)         out = self.out_layer(out)        return out  # -------------------------- hy add 02 -------------------------- def handle(gif_path: str, output_dir: str, type: int, device='cpu'):    _ext = os.path.basename(gif_path).strip().split('.')[-1]    if type == 1:        _checkpoint = './weights/paprika.pt'    elif type == 2:        _checkpoint = './weights/face_paint_512_v1.pt'    elif type == 3:        _checkpoint = './weights/face_paint_512_v2.pt'    elif type == 4:        _checkpoint = './weights/celeba_distill.pt'    else:        raise Exception('type not support')    os.makedirs(output_dir, exist_ok=True)    net = Generator()    net.load_state_dict(torch.load(_checkpoint, map_location="cpu"))    net.to(device).eval()    result = os.path.join(output_dir, '{}.{}'.format(uuid.uuid1().hex, _ext))    img = Image.open(gif_path)    out_images = []    for frame in ImageSequence.Iterator(img):        frame = frame.convert("RGB")        with torch.no_grad():            image = to_tensor(frame).unsqueeze(0) * 2 - 1            out = net(image.to(device), False).cpu()            out = out.squeeze(0).clip(-1, 1) * 0.5 + 0.5            out = to_pil_image(out)            out_images.append(out)    # out_images[0].save(result, save_all=True, loop=True, append_images=out_images[1:], duration=100)    imageio.mimsave(result, out_images, fps=15)    return result  if __name__ == '__main__':    print(handle('samples/gif/128.gif', 'samples/gif_result/', 3, 'cuda'))

代码说明：

1、主要的handle方法入参分别为：gif地址、输出目录、类型、设备使用（默认cpu，可选cuda使用显卡）。

2、类型主要是选择模型，最好用3，人像处理更生动一些。

执行验证一下

下面是我准备的gif素材

执行结果如下

看一下效果

哈哈，有点意思哦。

视频卡通化

实现代码如下：

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2021/12/5 0:26# @Author  : 剑客阿良_ALiang# @Site    : # @File    : video_cartoon_tool.py # !/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2021/12/4 22:34# @Author  : 剑客阿良_ALiang# @Site    :# @File    : image_cartoon_tool.py from PIL import Image, ImageEnhanceimport torchfrom torchvision.transforms.functional import to_tensor, to_pil_imagefrom torch import nnimport osimport torch.nn.functional as Fimport uuidimport cv2import numpy as npimport timefrom ffmpy import FFmpeg  # -------------------------- hy add 01 --------------------------class ConvNormLReLU(nn.Sequential):    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, pad_mode="reflect", groups=1, bias=False):        pad_layer = {            "zero": nn.ZeroPad2d,            "same": nn.ReplicationPad2d,            "reflect": nn.ReflectionPad2d,        }        if pad_mode not in pad_layer:            raise NotImplementedError         super(ConvNormLReLU, self).__init__(            pad_layer[pad_mode](padding),            nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=bias),            nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True),            nn.LeakyReLU(0.2, inplace=True)        )  class InvertedResBlock(nn.Module):    def __init__(self, in_ch, out_ch, expansion_ratio=2):        super(InvertedResBlock, self).__init__()         self.use_res_connect = in_ch == out_ch        bottleneck = int(round(in_ch * expansion_ratio))        layers = []        if expansion_ratio != 1:            layers.append(ConvNormLReLU(in_ch, bottleneck, kernel_size=1, padding=0))         # dw        layers.append(ConvNormLReLU(bottleneck, bottleneck, groups=bottleneck, bias=True))        # pw        layers.append(nn.Conv2d(bottleneck, out_ch, kernel_size=1, padding=0, bias=False))        layers.append(nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True))         self.layers = nn.Sequential(*layers)     def forward(self, input):        out = self.layers(input)        if self.use_res_connect:            out = input + out        return out  class Generator(nn.Module):    def __init__(self, ):        super().__init__()         self.block_a = nn.Sequential(            ConvNormLReLU(3, 32, kernel_size=7, padding=3),            ConvNormLReLU(32, 64, stride=2, padding=(0, 1, 0, 1)),            ConvNormLReLU(64, 64)        )         self.block_b = nn.Sequential(            ConvNormLReLU(64, 128, stride=2, padding=(0, 1, 0, 1)),            ConvNormLReLU(128, 128)        )         self.block_c = nn.Sequential(            ConvNormLReLU(128, 128),            InvertedResBlock(128, 256, 2),            InvertedResBlock(256, 256, 2),            InvertedResBlock(256, 256, 2),            InvertedResBlock(256, 256, 2),            ConvNormLReLU(256, 128),        )         self.block_d = nn.Sequential(            ConvNormLReLU(128, 128),            ConvNormLReLU(128, 128)        )         self.block_e = nn.Sequential(            ConvNormLReLU(128, 64),            ConvNormLReLU(64, 64),            ConvNormLReLU(64, 32, kernel_size=7, padding=3)        )         self.out_layer = nn.Sequential(            nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False),            nn.Tanh()        )     def forward(self, input, align_corners=True):        out = self.block_a(input)        half_size = out.size()[-2:]        out = self.block_b(out)        out = self.block_c(out)         if align_corners:            out = F.interpolate(out, half_size, mode="bilinear", align_corners=True)        else:            out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)        out = self.block_d(out)         if align_corners:            out = F.interpolate(out, input.size()[-2:], mode="bilinear", align_corners=True)        else:            out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)        out = self.block_e(out)         out = self.out_layer(out)        return out  # -------------------------- hy add 02 -------------------------- def handle(video_path: str, output_dir: str, type: int, fps: int, device='cpu'):    _ext = os.path.basename(video_path).strip().split('.')[-1]    if type == 1:        _checkpoint = './weights/paprika.pt'    elif type == 2:        _checkpoint = './weights/face_paint_512_v1.pt'    elif type == 3:        _checkpoint = './weights/face_paint_512_v2.pt'    elif type == 4:        _checkpoint = './weights/celeba_distill.pt'    else:        raise Exception('type not support')    os.makedirs(output_dir, exist_ok=True)    # 获取视频音频    _audio = extract(video_path, output_dir, 'wav')    net = Generator()    net.load_state_dict(torch.load(_checkpoint, map_location="cpu"))    net.to(device).eval()    result = os.path.join(output_dir, '{}.{}'.format(uuid.uuid1().hex, _ext))    capture = cv2.VideoCapture(video_path)    size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))    print(size)    videoWriter = cv2.VideoWriter(result, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)    cul = 0    with torch.no_grad():        while True:            ret, frame = capture.read()            if ret:                print(ret)                image = to_tensor(frame).unsqueeze(0) * 2 - 1                out = net(image.to(device), False).cpu()                out = out.squeeze(0).clip(-1, 1) * 0.5 + 0.5                out = to_pil_image(out)                contrast_enhancer = ImageEnhance.Contrast(out)                img_enhanced_image = contrast_enhancer.enhance(2)                enhanced_image = np.asarray(img_enhanced_image)                videoWriter.write(enhanced_image)                cul += 1                print('第{}张图'.format(cul))            else:                break    videoWriter.release()    # 视频添加原音频    _final_video = video_add_audio(result, _audio, output_dir)    return _final_video  # -------------------------- hy add 03 --------------------------def extract(video_path: str, tmp_dir: str, ext: str):    file_name = '.'.join(os.path.basename(video_path).split('.')[0:-1])    print('文件名:{}，提取音频'.format(file_name))    if ext == 'mp3':        return _run_ffmpeg(video_path, os.path.join(tmp_dir, '{}.{}'.format(uuid.uuid1().hex, ext)), 'mp3')    if ext == 'wav':        return _run_ffmpeg(video_path, os.path.join(tmp_dir, '{}.{}'.format(uuid.uuid1().hex, ext)), 'wav')  def _run_ffmpeg(video_path: str, audio_path: str, format: str):    ff = FFmpeg(inputs={video_path: None},                outputs={audio_path: '-f {} -vn'.format(format)})    print(ff.cmd)    ff.run()    return audio_path  # 视频添加音频def video_add_audio(video_path: str, audio_path: str, output_dir: str):    _ext_video = os.path.basename(video_path).strip().split('.')[-1]    _ext_audio = os.path.basename(audio_path).strip().split('.')[-1]    if _ext_audio not in ['mp3', 'wav']:        raise Exception('audio format not support')    _codec = 'copy'    if _ext_audio == 'wav':        _codec = 'aac'    result = os.path.join(        output_dir, '{}.{}'.format(            uuid.uuid4(), _ext_video))    ff = FFmpeg(        inputs={video_path: None, audio_path: None},        outputs={result: '-map 0:v -map 1:a -c:v copy -c:a {} -shortest'.format(_codec)})    print(ff.cmd)    ff.run()    return result  if __name__ == '__main__':    print(handle('samples/video/981.mp4', 'samples/video_result/', 3, 25, 'cuda'))

代码说明

1、主要的实现方法入参分别为：视频地址、输出目录、类型、fps（帧率）、设备类型（默认cpu，可选择cuda显卡模式）。

2、类型主要是选择模型，最好用3，人像处理更生动一些。

3、代码设计思路：先将视频音频提取出来、将视频逐帧处理后写入新视频、新视频和原视频音频融合。

关于如何视频提取音频可以参考我的另一篇文章：python 提取视频中的音频

关于如何视频融合音频可以参考我的另一篇文章：Python 视频添加音频

4、视频中间会产生临时文件，没有清理，如需要可以修改代码自行清理。

验证一下

下面是我准备的视频素材截图，我会上传到github上。