pip install paddlepaddle
git clone https://github.com/PaddlePaddle/PaddleOCR
cd PaddleOCR
#查看需求文件,安装需求
pip3 install -r requirements.txt
在安装过程中,会出现gcc的问题
Building wheels for collected packages: python-Levenshtein, bce-python-sdk, futureBuilding wheel for python-Levenshtein (setup.py) ... errorERROR: Command errored out with exit status 1:command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck6/ python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4f b56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from se tuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"' "'))' bdist_wheel -d /tmp/pip-wheel-knx85gsscwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/Complete output (31 lines):running bdist_wheelrunning buildrunning build_pycreating buildcreating build/lib.linux-x86_64-3.8creating build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshteinrunning egg_infowriting python_Levenshtein.egg-info/PKG-INFOwriting dependency_links to python_Levenshtein.egg-info/dependency_links.txtdeleting python_Levenshtein.egg-info/entry_points.txtwriting namespace_packages to python_Levenshtein.egg-info/namespace_packages.txtwriting requirements to python_Levenshtein.egg-info/requires.txtwriting top-level names to python_Levenshtein.egg-info/top_level.txtreading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'reading manifest template 'MANIFEST.in'warning: no previously-included files matching '*pyc' found anywhere in distributionwarning: no previously-included files matching '*so' found anywhere in distributionwarning: no previously-included files matching '.project' found anywhere in distributionwarning: no previously-included files matching '.pydevproject' found anywhere in distributionadding license file 'COPYING'writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshteinrunning build_extbuilding 'Levenshtein._levenshtein' extensioncreating build/temp.linux-x86_64-3.8creating build/temp.linux-x86_64-3.8/Levenshteingcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -f PIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.oerror: command 'gcc' failed: No such file or directory----------------------------------------ERROR: Failed building wheel for python-LevenshteinRunning setup.py clean for python-LevenshteinBuilding wheel for bce-python-sdk (setup.py) ... doneCreated wheel for bce-python-sdk: filename=bce_python_sdk-0.8.64-py3-none-any.whl size=202973 sha256=4c692a466b1f9b9edcb8d0d615bc81164604616 3889fbb3d83a15f08c2d1ecfcStored in directory: /root/.cache/pip/wheels/88/12/83/e1691769d9552209d668e0db7ee723e110af3eda7e5a7a3a5cBuilding wheel for future (setup.py) ... doneCreated wheel for future: filename=future-0.18.2-py3-none-any.whl size=491070 sha256=066cfa308e6947f08415f3e40c604f7ce166266c06ab81079f50f6c d2d2ebde3Stored in directory: /root/.cache/pip/wheels/1b/3e/31/72653079400d50aff1c3492982a6965994629072cad3b97720
Successfully built bce-python-sdk future
Failed to build python-Levenshtein
Installing collected packages: pytz, pyparsing, platformdirs, filelock, distlib, virtualenv, toml, tifffile, scipy, pyyaml, PyWavelets, python -dateutil, pyflakes, pycryptodome, pycodestyle, packaging, nodeenv, networkx, mccabe, kiwisolver, imageio, identify, future, fonttools, cycler , cfgv, Babel, shellcheck-py, shapely, scikit-image, pre-commit, pandas, opencv-python, matplotlib, lxml, Flask-Babel, flake8, et-xmlfile, css utils, cssselect, cachetools, bce-python-sdk, visualdl, tqdm, python-Levenshtein, pyclipper, premailer, openpyxl, opencv-contrib-python, lmdb, imgaug, cython, attrdictRunning setup.py install for python-Levenshtein ... errorERROR: Command errored out with exit status 1:command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck 6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e 4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec' "'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --compile --install-headers /root/ana conda3/envs/ocr/include/python3.8/python-Levenshteincwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/Complete output (32 lines):running install/root/anaconda3/envs/ocr/lib/python3.8/site-packages/setuptools/command/install.py:34: SetuptoolsDeprecationWarning: setup.py install is d eprecated. Use build and pip and other standards-based tools.warnings.warn(running buildrunning build_pycreating buildcreating build/lib.linux-x86_64-3.8creating build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshteinrunning egg_infowriting python_Levenshtein.egg-info/PKG-INFOwriting dependency_links to python_Levenshtein.egg-info/dependency_links.txtwriting namespace_packages to python_Levenshtein.egg-info/namespace_packages.txtwriting requirements to python_Levenshtein.egg-info/requires.txtwriting top-level names to python_Levenshtein.egg-info/top_level.txtreading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'reading manifest template 'MANIFEST.in'warning: no previously-included files matching '*pyc' found anywhere in distributionwarning: no previously-included files matching '*so' found anywhere in distributionwarning: no previously-included files matching '.project' found anywhere in distributionwarning: no previously-included files matching '.pydevproject' found anywhere in distributionadding license file 'COPYING'writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshteincopying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshteinrunning build_extbuilding 'Levenshtein._levenshtein' extensioncreating build/temp.linux-x86_64-3.8creating build/temp.linux-x86_64-3.8/Levenshteingcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.oerror: command 'gcc' failed: No such file or directory----------------------------------------
ERROR: Command errored out with exit status 1: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0 ] = '"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/ python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__fi le__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(c ompile(code, __file__, '"'"'exec'"'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --co mpile --install-headers /root/anaconda3/envs/ocr/include/python3.8/python-Levenshtein Check the logs for full command output.
然后安装gcc的过程中,又发现如下问题:
(ocr) root@spider:~/apps/ocr/PaddleOCR# apt-get build-dep gcc
Reading package lists... Done
Picking 'gcc-defaults' as source package instead of 'gcc'
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Some packages could not be installed. This may mean that you have
requested an impossible situation or if you are using the unstable
distribution that some required packages have not yet been created
or been moved out of Incoming.
The following information may help to resolve the situation:The following packages have unmet dependencies:g++ : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installedgcc : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installedlibc6-dev : Depends: libc6 (= 2.31-0ubuntu9.9) but 2.35-0ubuntu3 is to be installedDepends: libc-dev-bin (= 2.31-0ubuntu9.9)Depends: libcrypt-dev but it is not going to be installed
E: Unable to correct problems, you have held broken packages.
使用lsb_release -a查看系统代号:
(base) root@spider:~/apps# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 22.04 LTS
Release: 22.04
Codename: jammy
(base) root@spider:~/apps# vim /etc/apt/sources.list
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiversedeb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiversedeb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiversedeb http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiversedeb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
参照:https://blog.csdn.net/weixin_43894075/article/details/115141599
将focal改为jammy,然后apt update & apt upgrade
然后再次安装gcc即可
然后出现各种问题:
直到把paddle版本减低到2.0.0rc1,问题就全部解决了
python -m pip install paddlepaddle==2.0.0rc1
另外,gcc版本可能有问题,需要安装低版本的gcc环境
PaddlePaddle最高支持gcc8,而Ubuntu22.04的gcc版本是11.2.0,可能存在不兼容问题 而Ubuntu20.04,gcc版本为 9.4.0, 已验证可以正常安装paddlepaddle-gpu
apt install gcc-9 g+±9
apt install gcc-11 g+±11
参见:https://blog.csdn.net/zhqh100/article/details/124410399
(ocr) root@spider:~/apps/ocr# dpkg -l | grep gcc
ii gcc 4:11.2.0-1ubuntu1 amd64 GNU C compiler
ii gcc-10-base:amd64 10.3.0-15ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-11 11.2.0-19ubuntu1 amd64 GNU C compiler
ii gcc-11-base:amd64 11.2.0-19ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-12-base:amd64 12-20220319-1ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-9 9.4.0-5ubuntu1 amd64 GNU C compiler
ii gcc-9-base:amd64 9.4.0-5ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii libgcc-11-dev:amd64 11.2.0-19ubuntu1 amd64 GCC support library (development files)
ii libgcc-9-dev:amd64 9.4.0-5ubuntu1 amd64 GCC support library (development files)
ii libgcc-s1:amd64 12-20220319-1ubuntu1 amd64 GCC support library
ii libuno-cppuhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- CPPU helper library
ii libuno-purpenvhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- "purpose environment" helper
ii libuno-salhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- SAL helpers for C++ libraryupdate-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 30 --slave /usr/bin/g++ g++ /usr/bin/g++-9update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 20 --slave /usr/bin/g++ g++ /usr/bin/g++-11#然后手工切换下gcc(base) root@spider:~/apps/ocr# update-alternatives --config gcc
There are 2 choices for the alternative gcc (providing /usr/bin/gcc).Selection Path Priority Status
------------------------------------------------------------0 /usr/bin/gcc-9 30 auto mode
* 1 /usr/bin/gcc-11 20 manual mode2 /usr/bin/gcc-9 30 manual modePress <enter> to keep the current choice[*], or type selection number: 0
update-alternatives: using /usr/bin/gcc-9 to provide /usr/bin/gcc (gcc) in auto mode
(base) root&#64;spider:~/apps/ocr# gcc -v
Using built-in specs.
COLLECT_GCC&#61;gcc
COLLECT_LTO_WRAPPER&#61;/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES&#61;nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT&#61;1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion&#61;&#39;Ubuntu 9.4.0-5ubuntu1&#39; --with-bugurl&#61;file:///usr/share/doc/gcc-9/README.Bugs --enable-languages&#61;c,ada,c&#43;&#43;,go,brig,d,fortran,objc,obj-c&#43;&#43;,gm2 --prefix&#61;/usr --with-gcc-major-version-only --program-suffix&#61;-9 --program-prefix&#61;x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir&#61;/usr/lib --without-included-gettext --enable-threads&#61;posix --libdir&#61;/usr/lib --enable-nls --enable-clocale&#61;gnu --enable-libstdcxx-debug --enable-libstdcxx-time&#61;yes --with-default-libstdcxx-abi&#61;new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib&#61;auto --enable-objc-gc&#61;auto --enable-multiarch --disable-werror --with-arch-32&#61;i686 --with-abi&#61;m64 --with-multilib-list&#61;m32,m64,mx32 --enable-multilib --with-tune&#61;generic --enable-offload-targets&#61;nvptx-none&#61;/build/gcc-9-bVKGhJ/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking&#61;release --build&#61;x86_64-linux-gnu --host&#61;x86_64-linux-gnu --target&#61;x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-5ubuntu1)
然后启动python服务
(base) root&#64;spider:~/apps/ocr# cat server.py#!/usr/bin/python
import base64
from flask import Flask,jsonify,request,abort
from ocr_utils import *
import json
import numpy as np
import logging
from logging.handlers import RotatingFileHandler
from threading import Thread
import timeapp &#61; Flask(__name__)&#64;app.route(&#39;/&#39;, methods&#61;[&#39;GET&#39;, &#39;POST&#39;])
def home():return &#39;Home
&#39;&#64;app.route(&#39;/ocr&#39;, methods&#61;[&#39;POST&#39;])
def ocr():try:app.logger.info(request.headers)app.logger.info(type(request.json))app.logger.info(request.json)app.logger.info(request.json[&#39;path&#39;])app.logger.info(request.json[&#39;image&#39;])data&#61;request.dataapp.logger.info(data)except BaseException :app.logger.error("发生了异常")return &#39;Bad request param .
&#39;else:path&#61;request.json[&#39;path&#39;]image&#61;request.json[&#39;image&#39;]ocrResult&#61;image_ocr(path,image)app.logger.info("ocrResult....................................................")app.logger.info(ocrResult)# 第一种response &#61; parseOcrResult(ocrResult)# 将python的字典转换为json字符串return response,200,{"Content-Type":"application/json"}def parseOcrResult(ocrResult):text &#61; ""score&#61;"0"if len(ocrResult)>0:extractResult&#61;ocrResult[0][1]text&#61;extractResult[0]score&#61;extractResult[1]# 返回json数据的方法data &#61; {"text":text,"score": np.float(score)}respnse &#61; json.dumps(data, ensure_ascii&#61;False)return respnseif __name__ &#61;&#61; &#39;__main__&#39;:app.run(host&#61;&#39;0.0.0.0&#39;,port&#61;30003,debug&#61;True)
(base) root&#64;spider:~/apps/ocr# cat ocr_utils.py
# !usr/bin/env python
# -*- coding: utf-8 -*-
"""
-------------------------------------------------File Name&#xff1a; g_ocrDescription :Author : yangstdate&#xff1a; 2022/1/25
-------------------------------------------------Change Activity:2022/1/25:
-------------------------------------------------
"""from PIL import Image
from paddleocr import PaddleOCR# 加载ocr模型
ocr &#61; PaddleOCR(use_angle_cls&#61;True, lang&#61;"ch", cls_thresh&#61;0.1, det_db_box_thresh&#61;0.1)def image_ocr(path,imageName):"""针对图片进行处理1. 读取待识别图片2. 根据待识别图片的大小判断是否粘贴到bg.png3. 生成待识别图片对应的新的图片:param image_path: 待识别图片路径:return:"""bakImg&#61;"/root/apps/ocr/bak/"&#43;imageNamebaseheight&#61;500img &#61; Image.open(path&#43;"/"&#43;imageName) # 加载图片pixels &#61; img.load()for y in range(img.size[1]): # 透明转白色背景&#xff08;如果是透明图片&#xff0c;白色字体需要另外处理&#xff09;for x in range(img.size[0]):if pixels[x, y][3] < 255:pixels[x, y] &#61; (255, 255, 255, 255)bg &#61; Image.open("bg2.png")bg.paste(img,(50,30)) # 复制到背景图w,h &#61; bg.sizeprint(&#39;img_size:&#39;, h, w)hpercent &#61; (baseheight / float(h))wsize &#61; int((float(w) * float(hpercent)))bg &#61; bg.resize((wsize, baseheight), Image.ANTIALIAS)bg.save(bakImg)return ocr.ocr(bakImg)
调用服务
(base) root&#64;spider:~/apps/ocr# curl --location --request POST &#39;http://127.0.0.1:30003/ocr&#39; --header &#39;Content-Type: application/json&#39; --data &#39;{"path": "/root/apps/ocr","image": "xxxx.png"
}&#39;
{"text": "一个文字的图片", "score": 0.9016667604446411}(base) root&#64;spider:~/apps/ocr#