您当前的位置:首页 > IT编程 > python
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:python简单实现图片文字分割

51自学网 2021-10-30 22:13:58
  python
这篇教程python简单实现图片文字分割写得很实用,希望能帮到您。

本文实例为大家分享了python简单实现图片文字分割的具体代码,供大家参考,具体内容如下

原图:

图片预处理:图片二值化以及图片降噪处理。

# 图片二值化def binarization(img,threshold):    #图片二值化操作    width,height=img.size    im_new = img.copy()    for i in range(width):        for j in range(height):            a = img.getpixel((i, j))            aa = 0.30 * a[0] + 0.59 * a[1] + 0.11 * a[2]            if (aa <= threshold):                im_new.putpixel((i, j), (0, 0, 0))            else:                im_new.putpixel((i, j), (255, 255, 255))    # im_new.show()  # 显示图像    return im_new
# 图片降噪处理def clear_noise(img):    # 图片降噪处理    x, y = img.width, img.height    for i in range(x-1):        for j in range(y-1):            if sum_9_region(img, i, j) < 600:                # 改变像素点颜色,白色                img.putpixel((i, j), (255,255,255))    # img = np.array(img)    #     # cv2.imwrite('handle_two.png', img)    #     # img = Image.open('handle_two.png')    img.show()    return img# 获取田字格内当前像素点的像素值def sum_9_region(img, x, y):    """    田字格    """    # 获取当前像素点的像素值    a1 = img.getpixel((x - 1, y - 1))[0]    a2 = img.getpixel((x - 1, y))[0]    a3 = img.getpixel((x - 1, y+1 ))[0]    a4 = img.getpixel((x, y - 1))[0]    a5 = img.getpixel((x, y))[0]    a6 = img.getpixel((x, y+1 ))[0]    a7 = img.getpixel((x+1 , y - 1))[0]    a8 = img.getpixel((x+1 , y))[0]    a9 = img.getpixel((x+1 , y+1))[0]    width = img.width    height = img.height    if a5 == 255:  # 如果当前点为白色区域,则不统计邻域值        return 2550    if y == 0:  # 第一行        if x == 0:  # 左上顶点,4邻域            # 中心点旁边3个点            sum_1 = a5 + a6 + a8 + a9            return 4*255 - sum_1        elif x == width - 1:  # 右上顶点            sum_2 = a5 + a6 + a2 + a3            return 4*255 - sum_2        else:  # 最上非顶点,6邻域            sum_3 = a2 + a3+ a5 + a6 + a8 + a9            return 6*255 - sum_3    elif y == height - 1:  # 最下面一行        if x == 0:  # 左下顶点            # 中心点旁边3个点            sum_4 = a5 + a8 + a7 + a4            return 4*255 - sum_4        elif x == width - 1:  # 右下顶点            sum_5 = a5 + a4 + a2 + a1            return 4*255 - sum_5        else:  # 最下非顶点,6邻域            sum_6 = a5+ a2 + a8 + a4 +a1 + a7            return 6*255 - sum_6    else:  # y不在边界        if x == 0:  # 左边非顶点            sum_7 = a4 + a5 + a6 + a7 + a8 + a9            return 6*255 - sum_7        elif x == width - 1:  # 右边非顶点            sum_8 = a4 + a5 + a6 + a1 + a2 + a3            return 6*255 - sum_8        else:  # 具备9领域条件的            sum_9 = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9            return 9*255 - sum_9

经过二值化和降噪后得到的图片

对图片进行水平投影与垂直投影:

# 传入二值化后的图片进行垂直投影def vertical(img):    """传入二值化后的图片进行垂直投影"""    pixdata = img.load()    w,h = img.size    ver_list = []    # 开始投影    for x in range(w):        black = 0        for y in range(h):            if pixdata[x,y][0] == 0:                black += 1        ver_list.append(black)    # 判断边界    l,r = 0,0    flag = False    t=0#判断分割数量    cuts = []    for i,count in enumerate(ver_list):        # 阈值这里为0        if flag is False and count > 0:            l = i            flag = True        if flag and count == 0:            r = i-1            flag = False            cuts.append((l,r))#记录边界点            t += 1    #print(t)    return cuts,t# 传入二值化后的图片进行水平投影def horizontal(img):    """传入二值化后的图片进行水平投影"""    pixdata = img.load()    w,h = img.size    ver_list = []    # 开始投影    for y in range(h):        black = 0        for x in range(w):            if pixdata[x,y][0] == 0:                black += 1        ver_list.append(black)    # 判断边界    l,r = 0,0    flag = False    # 分割区域数    t=0    cuts = []    for i,count in enumerate(ver_list):        # 阈值这里为0        if flag is False and count > 0:            l = i            flag = True        if flag and count == 0:            r = i-1            flag = False            cuts.append((l,r))            t += 1    return cuts,t

这两段代码目的主要是为了分割得到水平和垂直位置的每个字所占的大小,接下来就是对预处理好的图片文字进行分割。

# 创建获得图片路径并处理图片函数def get_im_path():    OpenFile = tk.Tk()#创建新窗口    OpenFile.withdraw()    file_path = filedialog.askopenfilename()    im = Image.open(file_path)    # 阈值    th = getthreshold(im) - 16    print(th)    # 原图直接二值化    im_new1 = binarization(im, th)    im_new1.show()    # 直方图均衡化    im1 = his_bal(im)    im1.show()    im_new_np = np.array(his_bal(im))    th1 = getthreshold(im1) - 16    print(th1)    # 二值化    im_new = binarization(im1, th1)    # 降噪    im_new_cn = clear_noise(im_new)    height = im_new_cn.size[1]    print(height)    # 算出水平投影和垂直投影的数值    v, vt = vertical(im_new1)    h, ht = horizontal(im_new1)    # 算出分割区域    a = []    for i in range(vt):        a.append((v[i][0], 0, v[i][1], height))    print(a)    im_new.show()  # 直方图均衡化后再二值化    # 切割    for i, n in enumerate(a, 1):        temp = im_new_cn.crop(n)  # 调用crop函数进行切割        temp.show()        temp.save("c/%s.png" % i)

至此大概就完成了。

接下来是文件的全部代码:

import numpy as npfrom PIL import Imageimport queueimport  matplotlib.pyplot as pltimport  tkinter as tkfrom tkinter import filedialog#导入文件对话框函数库window = tk.Tk()window.title('图片选择界面')window.geometry('400x100')var = tk.StringVar()# 创建获得图片路径并处理图片函数def get_im_path():    OpenFile = tk.Tk()#创建新窗口    OpenFile.withdraw()    file_path = filedialog.askopenfilename()    im = Image.open(file_path)    # 阈值    th = getthreshold(im) - 16    print(th)    # 原图直接二值化    im_new1 = binarization(im, th)    im_new1.show()    # 直方图均衡化    im1 = his_bal(im)    im1.show()    im_new_np = np.array(his_bal(im))    th1 = getthreshold(im1) - 16    print(th1)    # 二值化    im_new = binarization(im1, th1)    # 降噪    im_new_cn = clear_noise(im_new)    height = im_new_cn.size[1]    print(height)    # 算出水平投影和垂直投影的数值    v, vt = vertical(im_new1)    h, ht = horizontal(im_new1)    # 算出分割区域    a = []    for i in range(vt):        a.append((v[i][0], 0, v[i][1], height))    print(a)    im_new.show()  # 直方图均衡化后再二值化    # 切割    for i, n in enumerate(a, 1):        temp = im_new_cn.crop(n)  # 调用crop函数进行切割        temp.show()        temp.save("c/%s.png" % i)# 传入二值化后的图片进行垂直投影def vertical(img):    """传入二值化后的图片进行垂直投影"""    pixdata = img.load()    w,h = img.size    ver_list = []    # 开始投影    for x in range(w):        black = 0        for y in range(h):            if pixdata[x,y][0] == 0:                black += 1        ver_list.append(black)    # 判断边界    l,r = 0,0    flag = False    t=0#判断分割数量    cuts = []    for i,count in enumerate(ver_list):        # 阈值这里为0        if flag is False and count > 0:            l = i            flag = True        if flag and count == 0:            r = i-1            flag = False            cuts.append((l,r))#记录边界点            t += 1    #print(t)    return cuts,t# 传入二值化后的图片进行水平投影def horizontal(img):    """传入二值化后的图片进行水平投影"""    pixdata = img.load()    w,h = img.size    ver_list = []    # 开始投影    for y in range(h):        black = 0        for x in range(w):            if pixdata[x,y][0] == 0:                black += 1        ver_list.append(black)    # 判断边界    l,r = 0,0    flag = False    # 分割区域数    t=0    cuts = []    for i,count in enumerate(ver_list):        # 阈值这里为0        if flag is False and count > 0:            l = i            flag = True        if flag and count == 0:            r = i-1            flag = False            cuts.append((l,r))            t += 1    return cuts,t# 获得阈值算出平均像素def getthreshold(im):    #获得阈值 算出平均像素    wid, hei = im.size    hist = [0] * 256    th = 0    for i in range(wid):        for j in range(hei):            gray = int(0.3 * im.getpixel((i, j))[0] + 0.59 * im.getpixel((i, j))[1] + 0.11 * im.getpixel((i, j))[2])            th = gray + th            hist[gray] += 1    threshold = int(th/(wid*hei))    return threshold# 直方图均衡化 提高对比度def his_bal(im):    #直方图均衡化 提高对比度    # 统计灰度直方图    im_new = im.copy()    wid, hei = im.size    hist = [0] * 256    for i in range(wid):        for j in range(hei):            gray = int(0.3*im.getpixel((i,j))[0]+0.59*im.getpixel((i,j))[1]+0.11*im.getpixel((i,j))[2])            hist[gray] += 1    # 计算累积分布函数    cdf = [0] * 256    for i in range(256):        if i == 0:            cdf[i] = hist[i]        else:            cdf[i] = cdf[i - 1] + hist[i]    # 用累积分布函数计算输出灰度映射函数LUT    new_gray = [0] * 256    for i in range(256):        new_gray[i] = int(cdf[i] / (wid * hei) * 255 + 0.5)    # 遍历原图像,通过LUT逐点计算新图像对应的像素值    for i in range(wid):        for j in range(hei):            gray = int(0.3*im.getpixel((i,j))[0]+0.59*im.getpixel((i,j))[1]+0.11*im.getpixel((i,j))[2])            im_new.putpixel((i, j), new_gray[gray])    return im_new# 图片二值化def binarization(img,threshold):    #图片二值化操作    width,height=img.size    im_new = img.copy()    for i in range(width):        for j in range(height):            a = img.getpixel((i, j))            aa = 0.30 * a[0] + 0.59 * a[1] + 0.11 * a[2]            if (aa <= threshold):                im_new.putpixel((i, j), (0, 0, 0))            else:                im_new.putpixel((i, j), (255, 255, 255))    # im_new.show()  # 显示图像    return im_new# 图片降噪处理def clear_noise(img):    # 图片降噪处理    x, y = img.width, img.height    for i in range(x-1):        for j in range(y-1):            if sum_9_region(img, i, j) < 600:                # 改变像素点颜色,白色                img.putpixel((i, j), (255,255,255))    # img = np.array(img)    #     # cv2.imwrite('handle_two.png', img)    #     # img = Image.open('handle_two.png')    img.show()    return img# 获取田字格内当前像素点的像素值def sum_9_region(img, x, y):    """    田字格    """    # 获取当前像素点的像素值    a1 = img.getpixel((x - 1, y - 1))[0]    a2 = img.getpixel((x - 1, y))[0]    a3 = img.getpixel((x - 1, y+1 ))[0]    a4 = img.getpixel((x, y - 1))[0]    a5 = img.getpixel((x, y))[0]    a6 = img.getpixel((x, y+1 ))[0]    a7 = img.getpixel((x+1 , y - 1))[0]    a8 = img.getpixel((x+1 , y))[0]    a9 = img.getpixel((x+1 , y+1))[0]    width = img.width    height = img.height    if a5 == 255:  # 如果当前点为白色区域,则不统计邻域值        return 2550    if y == 0:  # 第一行        if x == 0:  # 左上顶点,4邻域            # 中心点旁边3个点            sum_1 = a5 + a6 + a8 + a9            return 4*255 - sum_1        elif x == width - 1:  # 右上顶点            sum_2 = a5 + a6 + a2 + a3            return 4*255 - sum_2        else:  # 最上非顶点,6邻域            sum_3 = a2 + a3+ a5 + a6 + a8 + a9            return 6*255 - sum_3    elif y == height - 1:  # 最下面一行        if x == 0:  # 左下顶点            # 中心点旁边3个点            sum_4 = a5 + a8 + a7 + a4            return 4*255 - sum_4        elif x == width - 1:  # 右下顶点            sum_5 = a5 + a4 + a2 + a1            return 4*255 - sum_5        else:  # 最下非顶点,6邻域            sum_6 = a5+ a2 + a8 + a4 +a1 + a7            return 6*255 - sum_6    else:  # y不在边界        if x == 0:  # 左边非顶点            sum_7 = a4 + a5 + a6 + a7 + a8 + a9            return 6*255 - sum_7        elif x == width - 1:  # 右边非顶点            sum_8 = a4 + a5 + a6 + a1 + a2 + a3            return 6*255 - sum_8        else:  # 具备9领域条件的            sum_9 = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9            return 9*255 - sum_9btn_Open = tk.Button(window,    text='打开图像',      # 显示在按钮上的文字    width=15, height=2,    command=get_im_path)     # 点击按钮式执行的命令btn_Open.pack()# 运行整体窗口window.mainloop()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持51zixue.net。


Pandas使用stack和pivot实现数据透视的方法
python socket多线程实现客户端与服务器连接
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。