Skip to content

Commit

Permalink
Add VisRes class
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Sep 20, 2023
1 parent 8806af1 commit 508beba
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 105 deletions.
112 changes: 13 additions & 99 deletions python/demo.py
Original file line number Diff line number Diff line change
@@ -1,110 +1,24 @@
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: [email protected]
import math
import random
from pathlib import Path

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

from rapidocr_onnxruntime import RapidOCR

# from rapidocr_openvino import RapidOCR


def draw_ocr_box_txt(image, boxes, txts, font_path, scores=None, text_score=0.5):
if not Path(font_path).exists():
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"Please download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)

h, w = image.height, image.width
if image.mode == "L":
image = image.convert("RGB")

img_left = image.copy()
img_right = Image.new("RGB", (w, h), (255, 255, 255))

random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and float(scores[idx]) < text_score:
continue

color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
draw_left.polygon(box, fill=color)
draw_right.polygon(
[
box[0][0],
box[0][1],
box[1][0],
box[1][1],
box[2][0],
box[2][1],
box[3][0],
box[3][1],
],
outline=color,
)

box_height = math.sqrt(
(box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2
)

box_width = math.sqrt(
(box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2
)

if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)

img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h))
return np.array(img_show)


def visualize(image_path, result, font_path="resources/fonts/FZYTK.TTF"):
image = Image.open(image_path)
boxes, txts, scores = list(zip(*result))

draw_img = draw_ocr_box_txt(
image, np.array(boxes), txts, font_path, scores, text_score=0.5
)
from rapidocr_onnxruntime import RapidOCR, VisRes

draw_img_save = Path("./inference_results/")
if not draw_img_save.exists():
draw_img_save.mkdir(parents=True, exist_ok=True)
# from rapidocr_openvino import RapidOCR, VisRes

image_save = str(draw_img_save / f"infer_{Path(image_path).name}")
cv2.imwrite(image_save, draw_img[:, :, ::-1])
print(f"The infer result has saved in {image_save}")

rapid_ocr = RapidOCR()
vis = VisRes(font_path="resources/fonts/FZYTK.TTF")

if __name__ == "__main__":
rapid_ocr = RapidOCR()
image_path = "tests/test_files/ch_en_num.jpg"
with open(image_path, "rb") as f:
img = f.read()

image_path = "tests/test_files/ch_en_num.jpg"
with open(image_path, "rb") as f:
img = f.read()
result, elapse_list = rapid_ocr(img)
print(result)
print(elapse_list)
result, elapse_list = rapid_ocr(img)
print(result)
print(elapse_list)

if result:
visualize(image_path, result, font_path="resources/fonts/FZYTK.TTF")
boxes, txts, scores = list(zip(*result))
res = vis(img, boxes, txts, scores)
cv2.imwrite("vis.png", res)
2 changes: 1 addition & 1 deletion python/rapidocr_onnxruntime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# @Author: SWHL
# @Contact: [email protected]
from .main import RapidOCR
from .utils import LoadImageError
from .utils import LoadImageError, VisRes
113 changes: 111 additions & 2 deletions python/rapidocr_onnxruntime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# @Author: SWHL
# @Contact: [email protected]
import argparse
import math
import random
import traceback
import warnings
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Tuple, Union

import cv2
import numpy as np
Expand All @@ -18,7 +20,7 @@
get_available_providers,
get_device,
)
from PIL import Image, UnidentifiedImageError
from PIL import Image, ImageDraw, ImageFont, UnidentifiedImageError

root_dir = Path(__file__).resolve().parent
InputType = Union[str, np.ndarray, bytes, Path]
Expand Down Expand Up @@ -344,3 +346,110 @@ def remove_prefix(
k = k.split(prefix)[1]
new_rec_dict[k] = v
return new_rec_dict


class VisRes:
def __init__(
self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5
):
if font_path is None:
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)

self.font_path = str(font_path)
self.text_score = text_score
self.load_img = LoadImage()

def __call__(
self,
img_content: InputType,
dt_boxes: np.ndarray,
txts: Optional[Union[List[str], Tuple[str]]] = None,
scores: Optional[Tuple[float]] = None,
) -> np.ndarray:
img = self.load_img(img_content)
img = Image.fromarray(img)

if txts is None and scores is None:
return self.draw_dt_boxes(img, dt_boxes)

return self.draw_ocr_box_txt(img, dt_boxes, txts, scores)

def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray:
img_temp = img.copy()
draw_img = ImageDraw.Draw(img_temp)
for idx, box in enumerate(dt_boxes):
draw_img.polygon(np.array(box), fill=self.get_random_color())

box_height = self.get_box_height(box)
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
draw_img.polygon(
np.array(box).reshape(8).tolist(),
outline=(0, 0, 0),
)
draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font)
return np.array(img_temp)

def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
h, w = image.height, image.width
if image.mode == "L":
image = image.convert("RGB")

img_left = image.copy()
img_right = Image.new("RGB", (w, h), (255, 255, 255))

random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and float(scores[idx]) < self.text_score:
continue

color = self.get_random_color()
draw_left.polygon(np.array(box), fill=color)
draw_right.polygon(
np.array(box).reshape(8).tolist(),
outline=color,
)

box_height = self.get_box_height(box)
box_width = self.get_box_width(box)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
draw_right.text(
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font
)
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)

img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h))
return np.array(img_show)

@staticmethod
def get_random_color():
return (
random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255),
)

@staticmethod
def get_box_height(box: List[List[float]]) -> float:
return math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)

@staticmethod
def get_box_width(box: List[List[float]]) -> float:
return math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
2 changes: 1 addition & 1 deletion python/rapidocr_openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# @Author: SWHL
# @Contact: [email protected]
from .main import RapidOCR
from .utils import LoadImageError
from .utils import LoadImageError, VisRes
Loading

0 comments on commit 508beba

Please sign in to comment.