Skip to content

Commit

Permalink
fix(rapidocr_web): Fixed issue #197
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Jul 8, 2024
1 parent df88d9c commit d2f3692
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 56 deletions.
2 changes: 1 addition & 1 deletion ocrweb/rapidocr_web/ocrweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def main():
parser.add_argument("-p", "--port", type=int, default=9003, help="IP port")
args = parser.parse_args()

print(f"Successfully launched and visit https://{args.ip}:{args.port} to view.")
print(f"Successfully launched and visit http://{args.ip}:{args.port} to view.")
server = make_server(args.ip, args.port, app)
server.serve_forever()

Expand Down
57 changes: 33 additions & 24 deletions ocrweb/rapidocr_web/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,69 +13,78 @@
from rapidocr_onnxruntime import RapidOCR


class OCRWebUtils():
class OCRWebUtils:
def __init__(self) -> None:
self.ocr = RapidOCR()
self.WebReturn = namedtuple(
'WebReturn',
['image', 'total_elapse', 'elapse_part', 'rec_res', 'det_boxes'])
"WebReturn",
["image", "total_elapse", "elapse_part", "rec_res", "det_boxes"],
)

def __call__(self, img_content: str) -> namedtuple:
if img_content is None:
raise ValueError('img is None')
raise ValueError("img is None")
img = self.prepare_img(img_content)
ocr_res, elapse = self.ocr(img)
return self.get_web_result(img, ocr_res, elapse)

def prepare_img(self, img_str: str) -> np.ndarray:
img_str = img_str.split(',')[1]
image = base64.b64decode(img_str + '=' * (-len(img_str) % 4))
img_str = img_str.split(",")[1]
image = base64.b64decode(img_str + "=" * (-len(img_str) % 4))
nparr = np.frombuffer(image, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if image.ndim == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
return image

def get_web_result(self,
img: np.ndarray,
ocr_res: List,
elapse: List) -> Tuple[Union[str, List, str, str]]:
def get_web_result(
self, img: np.ndarray, ocr_res: List, elapse: List
) -> Tuple[Union[str, List, str, str]]:
if ocr_res is None:
total_elapse, elapse_part = 0, ''
total_elapse, elapse_part = 0, ""
img_str = self.img_to_base64(img)
rec_res = json.dumps([], indent=2, ensure_ascii=False)
boxes = ''
boxes = ""
else:
boxes, txts, scores = list(zip(*ocr_res))
scores = [f"{v:.4f}" for v in scores]
rec_res = list(zip(range(len(txts)), txts, scores))
rec_res = json.dumps(rec_res, indent=2, ensure_ascii=False)

det_im = self.draw_text_det_res(np.array(boxes), img)
img_str = self.img_to_base64(det_im)

total_elapse = reduce(lambda x, y: float(x)+float(y), elapse)
elapse_part = ','.join([f'{x:.4f}' for x in elapse])
total_elapse = reduce(lambda x, y: float(x) + float(y), elapse)
elapse_part = ",".join([f"{x:.4f}" for x in elapse])

web_return = self.WebReturn(image=img_str,
total_elapse=f'{total_elapse:.4f}',
elapse_part=elapse_part,
rec_res=rec_res,
det_boxes=boxes)
web_return = self.WebReturn(
image=img_str,
total_elapse=f"{total_elapse:.4f}",
elapse_part=elapse_part,
rec_res=rec_res,
det_boxes=boxes,
)
return json.dumps(web_return._asdict())

@staticmethod
def img_to_base64(img) -> str:
img = cv2.imencode('.png', img)[1]
img = cv2.imencode(".png", img)[1]
img_str = str(base64.b64encode(img))[2:-1]
return img_str

@staticmethod
def draw_text_det_res(dt_boxes: np.ndarray,
raw_im: np.ndarray) -> np.ndarray:
def draw_text_det_res(dt_boxes: np.ndarray, raw_im: np.ndarray) -> np.ndarray:
src_im = copy.deepcopy(raw_im)
for i, box in enumerate(dt_boxes):
box = np.array(box).astype(np.int32).reshape(-1, 2)
cv2.polylines(src_im, [box], True, color=(0, 0, 255), thickness=1)
cv2.putText(src_im, str(i), (int(box[0][0]), int(box[0][1])),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
cv2.putText(
src_im,
str(i),
(int(box[0][0]), int(box[0][1])),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
2,
)
return src_im
6 changes: 3 additions & 3 deletions ocrweb/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pillow
Pillow<=10.0.0
requests
Flask>=2.1.0
rapidocr_onnxruntime
Flask>=2.1.0, <=3.0.0
rapidocr_onnxruntime>=1.3.0,<=2.0.0
get_pypi_latest_version
wheel
59 changes: 31 additions & 28 deletions ocrweb/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@

def get_readme():
root_dir = Path(__file__).resolve().parent.parent
readme_path = str(root_dir / 'docs' / 'doc_whl_rapidocr_web.md')
with open(readme_path, 'r', encoding='utf-8') as f:
readme_path = str(root_dir / "docs" / "doc_whl_rapidocr_web.md")
with open(readme_path, "r", encoding="utf-8") as f:
readme = f.read()
return readme


MODULE_NAME = 'rapidocr_web'
MODULE_NAME = "rapidocr_web"

obtainer = GetPyPiLatestVersion()
latest_version = obtainer(MODULE_NAME)
VERSION_NUM = obtainer.version_add_one(latest_version)

# 优先提取commit message中的语义化版本号,如无,则自动加1
if len(sys.argv) > 2:
match_str = ' '.join(sys.argv[2:])
match_str = " ".join(sys.argv[2:])
matched_versions = obtainer.extract_version(match_str)
if matched_versions:
VERSION_NUM = matched_versions
Expand All @@ -33,37 +33,40 @@ def get_readme():
setuptools.setup(
name=MODULE_NAME,
version=VERSION_NUM,
platforms='Any',
description='A cross platform OCR Library based on OnnxRuntime.',
platforms="Any",
description="A cross platform OCR Library based on OnnxRuntime.",
long_description=get_readme(),
long_description_content_type='text/markdown',
author='SWHL',
author_email='[email protected]',
url='https://github.com/RapidAI/RapidOCR',
download_url='https://github.com/RapidAI/RapidOCR.git',
license='Apache-2.0',
long_description_content_type="text/markdown",
author="SWHL",
author_email="[email protected]",
url="https://github.com/RapidAI/RapidOCR",
download_url="https://github.com/RapidAI/RapidOCR.git",
license="Apache-2.0",
include_package_data=True,
install_requires=['requests', 'Flask>=2.1.0', 'rapidocr_onnxruntime'],
packages=[MODULE_NAME,
f'{MODULE_NAME}.static.css',
f'{MODULE_NAME}.static.js',
f'{MODULE_NAME}.templates'],
package_data={'': ['*.ico', '*.css', '*.js', '*.html']},
install_requires=["requests", "Flask>=2.1.0", "rapidocr_onnxruntime"],
packages=[
MODULE_NAME,
f"{MODULE_NAME}.static.css",
f"{MODULE_NAME}.static.js",
f"{MODULE_NAME}.templates",
],
package_data={"": ["*.ico", "*.css", "*.js", "*.html"]},
keywords=[
'ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr'
"ocr,text_detection,text_recognition,db,onnxruntime,paddleocr,openvino,rapidocr"
],
classifiers=[
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
],
python_requires='>=3.6,<3.12',
python_requires=">=3.6,<3.13",
entry_points={
'console_scripts': [
f'{MODULE_NAME}={MODULE_NAME}.ocrweb:main',
"console_scripts": [
f"{MODULE_NAME}={MODULE_NAME}.ocrweb:main",
],
},
)

0 comments on commit d2f3692

Please sign in to comment.