Stay organized with collections
Save and categorize content based on your preferences.
Optical Character Recognition (OCR)
The Vision API can detect and extract text from images. There are two
annotation
features that support optical character recognition (OCR):
TEXT_DETECTION detects and extracts text from any image. For example, a
photograph might contain a street sign or traffic sign. The JSON includes
the entire extracted string, as well as individual words, and their bounding
boxes.
DOCUMENT_TEXT_DETECTION also extracts text from an image, but the response
is optimized for dense text and documents. The JSON includes page, block,
paragraph, word, and break information.
If you're new to Google Cloud, create an account to evaluate how
Cloud Vision performs in real-world
scenarios. New customers also get $300 in free credits to run, test, and
deploy workloads.
Set up your Google Cloud project and authentication
If you have not created a
Google Cloud project,
do so now.
Expand this section for instructions.
Sign in to your Google Cloud account. If you're new to
Google Cloud,
create an account to evaluate how our products perform in
real-world scenarios. New customers also get $300 in free credits to
run, test, and deploy workloads.
import com.google.cloud.vision.v1.AnnotateImageRequest;
import com.google.cloud.vision.v1.AnnotateImageResponse;
import com.google.cloud.vision.v1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1.EntityAnnotation;
import com.google.cloud.vision.v1.Feature;
import com.google.cloud.vision.v1.Image;
import com.google.cloud.vision.v1.ImageAnnotatorClient;
import com.google.protobuf.ByteString;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class DetectText {
public static void detectText() throws IOException {
// TODO(developer): Replace these variables before running the sample.
String filePath = "path/to/your/image/file.jpg";
detectText(filePath);
}
// Detects text in the specified image.
public static void detectText(String filePath) throws IOException {
List<AnnotateImageRequest> requests = new ArrayList<>();
ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));
Image img = Image.newBuilder().setContent(imgBytes).build();
Feature feat = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build();
AnnotateImageRequest request =
AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
requests.add(request);
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
for (AnnotateImageResponse res : responses) {
if (res.hasError()) {
System.out.format("Error: %s%n", res.getError().getMessage());
return;
}
// For full list of available annotations, see http://g.co/cloud/vision/docs
for (EntityAnnotation annotation : res.getTextAnnotationsList()) {
System.out.format("Text: %s%n", annotation.getDescription());
System.out.format("Position : %s%n", annotation.getBoundingPoly());
}
}
}
}
}
const vision = require('@google-cloud/vision');
// Creates a client
const client = new vision.ImageAnnotatorClient();
/**
* TODO(developer): Uncomment the following line before running the sample.
*/
// const fileName = 'Local image file, e.g. /path/to/image.png';
// Performs text detection on the local file
const [result] = await client.textDetection(fileName);
const detections = result.textAnnotations;
console.log('Text:');
detections.forEach(text => console.log(text));
def detect_text(path):
"""Detects text in the file."""
from google.cloud import vision
client = vision.ImageAnnotatorClient()
with open(path, 'rb') as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations
print('Texts:')
for text in texts:
print(f'\n"{text.description}"')
vertices = ([f'({vertex.x},{vertex.y})'
for vertex in text.bounding_poly.vertices])
print('bounds: {}'.format(','.join(vertices)))
if response.error.message:
raise Exception(
'{}\nFor more info on error messages, check: '
'https://cloud.google.com/apis/design/errors'.format(
response.error.message))
You can use the Vision API to perform feature detection on a remote image file that is located
in Cloud Storage or on the Web. To send a remote file request, specify the file's Web URL or
Cloud Storage URI in the request body.
import com.google.cloud.vision.v1.AnnotateImageRequest;
import com.google.cloud.vision.v1.AnnotateImageResponse;
import com.google.cloud.vision.v1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1.EntityAnnotation;
import com.google.cloud.vision.v1.Feature;
import com.google.cloud.vision.v1.Image;
import com.google.cloud.vision.v1.ImageAnnotatorClient;
import com.google.cloud.vision.v1.ImageSource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class DetectTextGcs {
public static void detectTextGcs() throws IOException {
// TODO(developer): Replace these variables before running the sample.
String filePath = "gs://your-gcs-bucket/path/to/image/file.jpg";
detectTextGcs(filePath);
}
// Detects text in the specified remote image on Google Cloud Storage.
public static void detectTextGcs(String gcsPath) throws IOException {
List<AnnotateImageRequest> requests = new ArrayList<>();
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();
Feature feat = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build();
AnnotateImageRequest request =
AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
requests.add(request);
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
for (AnnotateImageResponse res : responses) {
if (res.hasError()) {
System.out.format("Error: %s%n", res.getError().getMessage());
return;
}
// For full list of available annotations, see http://g.co/cloud/vision/docs
for (EntityAnnotation annotation : res.getTextAnnotationsList()) {
System.out.format("Text: %s%n", annotation.getDescription());
System.out.format("Position : %s%n", annotation.getBoundingPoly());
}
}
}
}
}
// Imports the Google Cloud client libraries
const vision = require('@google-cloud/vision');
// Creates a client
const client = new vision.ImageAnnotatorClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const bucketName = 'Bucket where the file resides, e.g. my-bucket';
// const fileName = 'Path to file within bucket, e.g. path/to/image.png';
// Performs text detection on the gcs file
const [result] = await client.textDetection(`gs://${bucketName}/${fileName}`);
const detections = result.textAnnotations;
console.log('Text:');
detections.forEach(text => console.log(text));
def detect_text_uri(uri):
"""Detects text in the file located in Google Cloud Storage or on the Web.
"""
from google.cloud import vision
client = vision.ImageAnnotatorClient()
image = vision.Image()
image.source.image_uri = uri
response = client.text_detection(image=image)
texts = response.text_annotations
print('Texts:')
for text in texts:
print(f'\n"{text.description}"')
vertices = ([f'({vertex.x},{vertex.y})'
for vertex in text.bounding_poly.vertices])
print('bounds: {}'.format(','.join(vertices)))
if response.error.message:
raise Exception(
'{}\nFor more info on error messages, check: '
'https://cloud.google.com/apis/design/errors'.format(
response.error.message))
Both types of OCR requests support one or more languageHints that specify the
language of any text in the image. However, an empty value usually yields the best results,
because omitting a value enables automatic language detection. For languages based on the Latin
alphabet, setting languageHints is not needed. In rare cases, when the language of
the text in
the image is known, setting a hint helps get better results (although it can be a significant
hindrance if the hint is wrong). Text detection returns an error if one or more of the specified
languages is not one of the
supported languages.
If you choose to provide a language hint, modify the body of your request
(request.json file) to provide the string of one of the supported languages
in the imageContext.languageHints field as shown in the following sample:
You can now specify continent-level data storage and OCR processing. The following regions
are currently supported:
us: USA country only
eu: The European Union
Locations
Cloud Vision offers you some control over where the resources for your project
are stored and processed. In particular, you can configure
Cloud Vision to store and process your data only in the European Union.
By default Cloud Vision stores and processes resources in a Global location,
which means that Cloud Vision doesn't guarantee that your resources will remain
within a particular location or region. If you choose the European Union location,
Google will store your data and process it only in the European Union.
You and your users can access the data from any location.
Setting the location using the API
The Vision API supports a global API endpoint (vision.googleapis.com) and also
two region-based endpoints: a European Union endpoint
(eu-vision.googleapis.com) and United States
endpoint (us-vision.googleapis.com). Use these endpoints for region-specific
processing. For example, to store and process your data in the European Union only, use the
URI eu-vision.googleapis.com in place of vision.googleapis.com
for your REST API calls:
To store and process your data in the United States only, use the US endpoint
(us-vision.googleapis.com) with the preceding methods.
Setting the location using the client libraries
The Vision API client libraries accesses the global API endpoint
(vision.googleapis.com) by default. To store and process your data in the
European Union only, you need to explicitly set the endpoint
(eu-vision.googleapis.com). The following code samples show how to configure
this setting.
REST
Request
Before using any of the request data,
make the following replacements:
REGION_ID: One of the valid regional
location identifiers:
us: USA country only
eu: The European Union
CLOUD_STORAGE_IMAGE_URI: the path to a valid
image file in a Cloud Storage bucket. You must at least have read privileges to the file.
Example:
gs://cloud-samples-data/vision/ocr/sign.jpg
PROJECT_ID: Your Google Cloud project ID.
HTTP method and URL:
POST https://REGION_ID-vision.googleapis.com/v1/projects/PROJECT_ID/locations/REGION_ID/images:annotate
ImageAnnotatorSettings settings =
ImageAnnotatorSettings.newBuilder().setEndpoint("eu-vision.googleapis.com:443").build();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
ImageAnnotatorClient client = ImageAnnotatorClient.create(settings);
// Imports the Google Cloud client library
const vision = require('@google-cloud/vision');
async function setEndpoint() {
// Specifies the location of the api endpoint
const clientOptions = {apiEndpoint: 'eu-vision.googleapis.com'};
// Creates a client
const client = new vision.ImageAnnotatorClient(clientOptions);
// Performs text detection on the image file
const [result] = await client.textDetection('./resources/wakeupcat.jpg');
const labels = result.textAnnotations;
console.log('Text:');
labels.forEach(label => console.log(label.description));
}
setEndpoint();
Try text detection and document text detection below. You can use the
image specified already (gs://cloud-samples-data/vision/ocr/sign.jpg) by clicking
Execute, or you can specify your own image in its place.
To try document text detection, update the value of type to
DOCUMENT_TEXT_DETECTION.