I tried to extract text from the image
Create a resource for the Computer Vision API
Four. Please install the required libraries.
pip install matplotlib
pip install pillow
pip install opencv-python
pip install --upgrade azure-cognitiveservices-vision-computervision
Five. Enter the key and endpoint you wrote down and run the following code!
subscription_key = "<your subscription key>"
endpoint = "<your API endpoint>"
The endpoint seems to work even if you specify the region.
endpoint = "https://<your region>.api.cognitive.microsoft.com/"
Quick Start: Extract printed and handwritten text using Computer Vision's REST API and Python (https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision) / quickstarts / python-hand-text)
import json
import os
import os.path
import sys
import requests
import time
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from PIL import Image
from io import BytesIO
import cv2
subscription_key = "<your subscription key>"
endpoint = "<your API endpoint>"
endpoint = "https://japanwest.api.cognitive.microsoft.com/"
text_recognition_url = endpoint + "vision/v3.1/read/analyze"
image_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg "
headers = {'Ocp-Apim-Subscription-Key': subscription_key}
data = {'url': image_url}
response = requests.post(text_recognition_url, headers=headers, json=data)
response.raise_for_status()
operation_url = response.headers["Operation-Location"]
analysis = {}
poll = True
while (poll):
response_final = requests.get(response.headers["Operation-Location"], headers=headers)
analysis = response_final.json()
print(json.dumps(analysis, indent=4))
time.sleep(1)
if ("analyzeResult" in analysis):
poll = False
if ("status" in analysis and analysis['status'] == 'failed'):
poll = False
polygons = []
if ("analyzeResult" in analysis):
polygons = [(line["boundingBox"], line["text"])
for line in analysis["analyzeResult"]["readResults"][0]["lines"]]
image = Image.open(BytesIO(requests.get(image_url).content))
ax = plt.imshow(image)
for polygon in polygons:
vertices = [(polygon[0][i], polygon[0][i+1])
for i in range(0, len(polygon[0]), 2)]
text = polygon[1]
patch = Polygon(vertices, closed=True, fill=False, linewidth=2, color='y')
ax.axes.add_patch(patch)
plt.text(vertices[0][0], vertices[0][1], text, fontsize=20, va="top")
plt.show()
input | output |
---|---|
import json
import os
import os.path
import sys
import requests
import time
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from PIL import Image
from io import BytesIO
import cv2
subscription_key = "<your subscription key>"
endpoint = "<your API endpoint>"
endpoint = "https://japanwest.api.cognitive.microsoft.com/"
text_recognition_url = endpoint + "vision/v3.1/read/analyze"
headers = {'Ocp-Apim-Subscription-Key': subscription_key, 'Content-Type': 'application/octet-stream'}
filename = "readsample.jpg "
root, ext = os.path.splitext(filename)
image_data = open(filename, "rb").read()
color = cv2.imread(filename, cv2.IMREAD_COLOR)
cv2.namedWindow("color", cv2.WINDOW_NORMAL)
cv2.imshow("color", color)
cv2.waitKey(1)
image_data = cv2.imencode(ext, color)[1].tostring()
response = requests.post(text_recognition_url, headers=headers, data=image_data)
response.raise_for_status()
operation_url = response.headers["Operation-Location"]
analysis = {}
poll = True
while (poll):
response_final = requests.get(
response.headers["Operation-Location"], headers=headers)
analysis = response_final.json()
print(json.dumps(analysis, indent=4))
time.sleep(1)
if ("analyzeResult" in analysis):
poll = False
if ("status" in analysis and analysis['status'] == 'failed'):
poll = False
polygons = []
if ("analyzeResult" in analysis):
polygons = [(line["boundingBox"], line["text"])
for line in analysis["analyzeResult"]["readResults"][0]["lines"]]
image = Image.open(BytesIO(image_data))
image = Image.fromarray(color)
ax = plt.imshow(image)
for polygon in polygons:
vertices = [(polygon[0][i], polygon[0][i+1])
for i in range(0, len(polygon[0]), 2)]
text = polygon[1]
patch = Polygon(vertices, closed=True, fill=False, linewidth=2, color='y')
ax.axes.add_patch(patch)
plt.text(vertices[0][0], vertices[0][1], text, fontsize=20, va="top")
plt.show()
input | output |
---|---|
Quick Start: Use Computer Vision Client Library (https://docs.microsoft.com/en-us/azure/cognitive-services/Computer-vision/quickstarts-sdk/client-library?pivots=programming-language -python & tabs = visual-studio)
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from array import array
import os
from PIL import Image
import sys
import time
import cv2
from io import BytesIO
subscription_key = "<your subscription key>"
endpoint = "<your API endpoint>"
endpoint = "https://japanwest.api.cognitive.microsoft.com/"
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
print("===== Batch Read File - remote =====")
remote_image_handw_text_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg "
recognize_handw_results = computervision_client.read(remote_image_handw_text_url, raw=True)
operation_location_remote = recognize_handw_results.headers["Operation-Location"]
operation_id = operation_location_remote.split("/")[-1]
while True:
get_handw_text_results = computervision_client.get_read_result(operation_id)
if get_handw_text_results.status not in ['notStarted', 'running']:
break
time.sleep(1)
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
print(line.text)
print(line.bounding_box)
print()
===== Batch Read File - remote =====
The quick brown fox jumps
[38.0, 650.0, 2572.0, 699.0, 2570.0, 854.0, 37.0, 815.0]
over
[184.0, 1053.0, 508.0, 1044.0, 510.0, 1123.0, 184.0, 1128.0]
the lazy dog!
[639.0, 1011.0, 1976.0, 1026.0, 1974.0, 1158.0, 637.0, 1141.0]
Thank you for your hard work.
Recommended Posts