Document text detection samples

Document Text Detection performs Optical Character Recognition. This feature detects dense document text in an image.

Detecting document text in a local image

Protocol

Refer to the images:annotate API endpoint for complete details.

To perform Document Text Detection, make a POST request and provide the appropriate request body:

POST https://vision.googleapis.com/v1/images:annotate?key=YOUR_API_KEY
{
  "requests": [
    {
      "image": {
        "content": "/9j/7QBEUGhvdG9zaG9...base64-encoded-image-content...fXNWzvDEeYxxxzj/Coa6Bax//Z"
      },
      "features": [
        {
          "type": "DOCUMENT_TEXT_DETECTION"
        }
      ]
    }
  ]
}

See the AnnotateImageRequest reference documentation for more information on configuring the request body.

If the request is successful, the server returns a 200 OK HTTP status code and the response in JSON format:

{
  "responses": [
    {
      "textAnnotations": [
        {
          "locale": "en",
          "description": "O Google Cloud Platform\n",
          "boundingPoly": {
            "vertices": [
              {
                "x": 14, "y": 11
              },
              {
                "x": 279, "y": 11
              },
              {
                "x": 279, "y": 37
              },
              {
                "x": 14, "y": 37
              }
            ]
          }
        },
      ],
      "fullTextAnnotation": {
        "pages": [
          {
            "property": {
              "detectedLanguages": [
                {
                  "languageCode": "en"
                }
              ]
            },
            "width": 281,
            "height": 44,
            "blocks": [
              {
                "property": {
                  "detectedLanguages": [
                    {
                      "languageCode": "en"
                    }
                  ]
                },
                "boundingBox": {
                  "vertices": [
                    {
                      "x": 14, "y": 11
                    },
                    {
                      "x": 279, "y": 11
                    },
                    {
                      "x": 279, "y": 37
                    },
                    {
                      "x": 14, "y": 37
                    }
                  ]
                },
                "paragraphs": [
                  {
                    "property": {
                      "detectedLanguages": [
                        {
                          "languageCode": "en"
                        }
                      ]
                    },
                    "boundingBox": {
                      "vertices": [
                        {
                          "x": 14, "y": 11
                        },
                        {
                          "x": 279, "y": 11
                        },
                        {
                          "x": 279, "y": 37
                        },
                        {
                          "x": 14, "y": 37
                        }
                      ]
                    },
                    "words": [
                      {
                        "property": {
                          "detectedLanguages": [
                            {
                              "languageCode": "en"
                            }
                          ]
                        },
                        "boundingBox": {
                          "vertices": [
                            {
                              "x": 14, "y": 11
                            },
                            {
                              "x": 23, "y": 11
                            },
                            {
                              "x": 23, "y": 37
                            },
                            {
                              "x": 14, "y": 37
                            }
                          ]
                        },
                        "symbols": [
                          {
                            "property": {
                              "detectedLanguages": [
                                {
                                  "languageCode": "en"
                                }
                              ],
                              "detectedBreak": {
                                "type": "SPACE"
                              }
                            },
                            "boundingBox": {
                              "vertices": [
                                {
                                  "x": 14, "y": 11
                                },
                                {
                                  "x": 23, "y": 11
                                },
                                {
                                  "x": 23, "y": 37
                                },
                                {
                                  "x": 14, "y": 37
                                }
                              ]
                            },
                            "text": "O"
                          }
                        ]
                      },
                    ]
                  }
                ],
                "blockType": "TEXT"
              }
            ]
          }
        ],
        "text": "Google Cloud Platform\n"
      }
    }
  ]
}

C#

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// Load an image from a local file.
var image = Image.FromFile(filePath);
var client = ImageAnnotatorClient.Create();
var response = client.DetectDocumentText(image);
foreach (var page in response.Pages)
{
    foreach (var block in page.Blocks)
    {
        foreach (var paragraph in block.Paragraphs)
        {
            Console.WriteLine(string.Join("\n", paragraph.Words));
        }
    }
}

Go

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// detectDocumentText gets the full document text from the Vision API for an image at the given file path.
func detectDocumentText(w io.Writer, file string) error {
	ctx := context.Background()

	client, err := vision.NewImageAnnotatorClient(ctx)
	if err != nil {
		return err
	}

	f, err := os.Open(file)
	if err != nil {
		return err
	}
	defer f.Close()

	image, err := vision.NewImageFromReader(f)
	if err != nil {
		return err
	}
	annotation, err := client.DetectDocumentText(ctx, image, nil)
	if err != nil {
		return err
	}

	fmt.Fprintln(w, "Text:")
	fmt.Fprintf(w, "%q\n", annotation.Text)

	return nil
}

Java

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

public static void detectDocumentText(String filePath, PrintStream out) throws Exception,
    IOException {
  List<AnnotateImageRequest> requests = new ArrayList<>();

  ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));

  Image img = Image.newBuilder().setContent(imgBytes).build();
  Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
  AnnotateImageRequest request =
      AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
  requests.add(request);

  try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
    BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
    List<AnnotateImageResponse> responses = response.getResponsesList();
    client.close();

    for (AnnotateImageResponse res : responses) {
      if (res.hasError()) {
        out.printf("Error: %s\n", res.getError().getMessage());
        return;
      }

      // For full list of available annotations, see http://g.co/cloud/vision/docs
      TextAnnotation annotation = res.getFullTextAnnotation();
      for (Page page: annotation.getPagesList()) {
        String pageText = "";
        for (Block block : page.getBlocksList()) {
          String blockText = "";
          for (Paragraph para : block.getParagraphsList()) {
            String paraText = "";
            for (Word word: para.getWordsList()) {
              String wordText = "";
              for (Symbol symbol: word.getSymbolsList()) {
                wordText = wordText + symbol.getText();
              }
              paraText = paraText + wordText;
            }
            // Output Example using Paragraph:
            out.println("Paragraph: \n" + paraText);
            out.println("Bounds: \n" + para.getBoundingBox() + "\n");
            blockText = blockText + paraText;
          }
          pageText = pageText + blockText;
        }
      }
      out.println(annotation.getText());
    }
  }
}

Node.js

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// Imports the Google Cloud client library
const Vision = require('@google-cloud/vision');

// Instantiates a client
const vision = Vision();

// The path to the local image file, e.g. "/path/to/image.png"
// const fileName = 'my-file.jpg';

// Read a local image as a text document
vision.documentTextDetection({ source: { filename: fileName } })
  .then((results) => {
    const fullTextAnnotation = results[0].fullTextAnnotation;
    console.log(fullTextAnnotation.text);
  })
  .catch((err) => {
    console.error('ERROR:', err);
  });

PHP

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

namespace Google\Cloud\Samples\Vision;

use Google\Cloud\Vision\VisionClient;

// $projectId = 'YOUR_PROJECT_ID';
// $path = 'path/to/your/image.jpg'

function detect_document_text($projectId, $path)
{
    $vision = new VisionClient([
        'projectId' => $projectId,
    ]);

    # Annotate the image
    $image = $vision->image(
        file_get_contents($path), ['DOCUMENT_TEXT_DETECTION']);
    $annotation = $vision->annotate($image);

    # Print out document text
    $document = $annotation->fullText();
    $text = $document->text();
    printf('Document text: %s' . PHP_EOL, $text);

    # Print out more detailed and structured information about document text
    foreach ($document->pages() as $page) {
        foreach ($page['blocks'] as $block) {
            $block_text = '';
            foreach ($block['paragraphs'] as $paragraph) {
                foreach ($paragraph['words'] as $word) {
                    foreach ($word['symbols'] as $symbol) {
                        $block_text .= $symbol['text'];
                    }
                    $block_text .= ' ';
                }
                $block_text .= "\n";
            }
            printf('Block text: %s' . PHP_EOL, $block_text);
            printf('Block bounds:' . PHP_EOL);
            foreach ($block['boundingBox']['vertices'] as $vertice) {
                printf('X: %s Y: %s' . PHP_EOL, $vertice['x'], $vertice['y']);
            }
            printf(PHP_EOL);
        }
    }
}

Python

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

def detect_document(path):
    """Detects document features in an image."""
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = types.Image(content=content)

    response = client.document_text_detection(image=image)
    document = response.full_text_annotation

    for page in document.pages:
        for block in page.blocks:
            block_words = []
            for paragraph in block.paragraphs:
                block_words.extend(paragraph.words)

            block_symbols = []
            for word in block_words:
                block_symbols.extend(word.symbols)

            block_text = ''
            for symbol in block_symbols:
                block_text = block_text + symbol.text

            print('Block Content: {}'.format(block_text))
            print('Block Bounds:\n {}'.format(block.bounding_box))

Ruby

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

# project_id = "Your Google Cloud project ID"
# image_path = "Path to local image file, eg. './image.png'"

require "google/cloud/vision"

vision = Google::Cloud::Vision.new project: project_id
image  = vision.image image_path

document = image.document

puts document.text

Detecting document text in a remote image

For your convenience, the Cloud Vision API can perform Document Text Detection directly on an image file located in Google Cloud Storage or on the Web without the need to send the contents of the image file in the body of your request.

Protocol

Refer to the images:annotate API endpoint for complete details.

To perform Document Text Detection, make a POST request and provide the appropriate request body:

POST https://vision.googleapis.com/v1/images:annotate?key=YOUR_API_KEY
{
  "requests": [
    {
      "image": {
        "source": {
          "gcsImageUri": "gs://YOUR_BUCKET_NAME/YOUR_FILE_NAME"
        }
      },
      "features": [
        {
          "type": "DOCUMENT_TEXT_DETECTION"
        }
      ]
    }
  ]
}

See the AnnotateImageRequest reference documentation for more information on configuring the request body.

If the request is successful, the server returns a 200 OK HTTP status code and the response in JSON format:

{
  "responses": [
    {
      "textAnnotations": [
        {
          "locale": "en",
          "description": "O Google Cloud Platform\n",
          "boundingPoly": {
            "vertices": [
              {
                "x": 14, "y": 11
              },
              {
                "x": 279, "y": 11
              },
              {
                "x": 279, "y": 37
              },
              {
                "x": 14, "y": 37
              }
            ]
          }
        },
      ],
      "fullTextAnnotation": {
        "pages": [
          {
            "property": {
              "detectedLanguages": [
                {
                  "languageCode": "en"
                }
              ]
            },
            "width": 281,
            "height": 44,
            "blocks": [
              {
                "property": {
                  "detectedLanguages": [
                    {
                      "languageCode": "en"
                    }
                  ]
                },
                "boundingBox": {
                  "vertices": [
                    {
                      "x": 14, "y": 11
                    },
                    {
                      "x": 279, "y": 11
                    },
                    {
                      "x": 279, "y": 37
                    },
                    {
                      "x": 14, "y": 37
                    }
                  ]
                },
                "paragraphs": [
                  {
                    "property": {
                      "detectedLanguages": [
                        {
                          "languageCode": "en"
                        }
                      ]
                    },
                    "boundingBox": {
                      "vertices": [
                        {
                          "x": 14, "y": 11
                        },
                        {
                          "x": 279, "y": 11
                        },
                        {
                          "x": 279, "y": 37
                        },
                        {
                          "x": 14, "y": 37
                        }
                      ]
                    },
                    "words": [
                      {
                        "property": {
                          "detectedLanguages": [
                            {
                              "languageCode": "en"
                            }
                          ]
                        },
                        "boundingBox": {
                          "vertices": [
                            {
                              "x": 14, "y": 11
                            },
                            {
                              "x": 23, "y": 11
                            },
                            {
                              "x": 23, "y": 37
                            },
                            {
                              "x": 14, "y": 37
                            }
                          ]
                        },
                        "symbols": [
                          {
                            "property": {
                              "detectedLanguages": [
                                {
                                  "languageCode": "en"
                                }
                              ],
                              "detectedBreak": {
                                "type": "SPACE"
                              }
                            },
                            "boundingBox": {
                              "vertices": [
                                {
                                  "x": 14, "y": 11
                                },
                                {
                                  "x": 23, "y": 11
                                },
                                {
                                  "x": 23, "y": 37
                                },
                                {
                                  "x": 14, "y": 37
                                }
                              ]
                            },
                            "text": "O"
                          }
                        ]
                      },
                    ]
                  }
                ],
                "blockType": "TEXT"
              }
            ]
          }
        ],
        "text": "Google Cloud Platform\n"
      }
    }
  ]
}

C#

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// Specify a Google Cloud Storage uri for the image
// or a publicly accessible HTTP or HTTPS uri.
var image = Image.FromUri(uri);
var client = ImageAnnotatorClient.Create();
var response = client.DetectDocumentText(image);
foreach (var page in response.Pages)
{
    foreach (var block in page.Blocks)
    {
        foreach (var paragraph in block.Paragraphs)
        {
            Console.WriteLine(string.Join("\n", paragraph.Words));
        }
    }
}

Go

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// detectDocumentText gets the full document text from the Vision API for an image at the given file path.
func detectDocumentTextURI(w io.Writer, file string) error {
	ctx := context.Background()

	client, err := vision.NewImageAnnotatorClient(ctx)
	if err != nil {
		return err
	}

	image := vision.NewImageFromURI(file)
	annotation, err := client.DetectDocumentText(ctx, image, nil)
	if err != nil {
		return err
	}

	fmt.Fprintln(w, "Text:")
	fmt.Fprintf(w, "%q\n", annotation.Text)

	return nil
}

Java

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws Exception,
    IOException {
  List<AnnotateImageRequest> requests = new ArrayList<>();

  ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
  Image img = Image.newBuilder().setSource(imgSource).build();
  Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
  AnnotateImageRequest request =
      AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
  requests.add(request);

  try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
    BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
    List<AnnotateImageResponse> responses = response.getResponsesList();
    client.close();

    for (AnnotateImageResponse res : responses) {
      if (res.hasError()) {
        out.printf("Error: %s\n", res.getError().getMessage());
        return;
      }
      // For full list of available annotations, see http://g.co/cloud/vision/docs
      TextAnnotation annotation = res.getFullTextAnnotation();
      for (Page page: annotation.getPagesList()) {
        String pageText = "";
        for (Block block : page.getBlocksList()) {
          String blockText = "";
          for (Paragraph para : block.getParagraphsList()) {
            String paraText = "";
            for (Word word: para.getWordsList()) {
              String wordText = "";
              for (Symbol symbol: word.getSymbolsList()) {
                wordText = wordText + symbol.getText();
              }
              paraText = paraText + wordText;
            }
            // Output Example using Paragraph:
            out.println("Paragraph: \n" + paraText);
            out.println("Bounds: \n" + para.getBoundingBox() + "\n");
            blockText = blockText + paraText;
          }
          pageText = pageText + blockText;
        }
      }
      out.println(annotation.getText());
    }
  }
}

Node.js

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

// Imports the Google Cloud client libraries
const Vision = require('@google-cloud/vision');

// Instantiates clients
const vision = Vision();

// The name of the bucket where the file resides, e.g. "my-bucket"
// const bucketName = 'my-bucket';

// The path to the file within the bucket, e.g. "path/to/image.png"
// const fileName = 'my-file.jpg';

const gcsPath = `gs://${bucketName}/${fileName}`;

// Read a remote image as a text document
vision.documentTextDetection({ source: { imageUri: gcsPath } })
  .then((results) => {
    const fullTextAnnotation = results[0].fullTextAnnotation;
    console.log(fullTextAnnotation.text);
  })
  .catch((err) => {
    console.error('ERROR:', err);
  });

PHP

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

namespace Google\Cloud\Samples\Vision;

use Google\Cloud\Vision\VisionClient;
use Google\Cloud\Storage\StorageClient;

// $projectId = 'YOUR_PROJECT_ID';
// $bucketName = 'your-bucket-name'
// $objectName = 'your-object-name'

function detect_document_text_gcs($projectId, $bucketName, $objectName)
{
    $vision = new VisionClient([
        'projectId' => $projectId,
    ]);
    $storage = new StorageClient([
        'projectId' => $projectId,
    ]);

    # Fetch the storage object and annotate the image
    $object = $storage->bucket($bucketName)->object($objectName);
    $image = $vision->image($object, ['DOCUMENT_TEXT_DETECTION']);
    $annotation = $vision->annotate($image);

    # Print out document text
    $document = $annotation->fullText();
    $text = $document->text();
    printf('Document text: %s' . PHP_EOL, $text);

    # Print out more detailed and structured information about document text
    foreach ($document->pages() as $page) {
        foreach ($page['blocks'] as $block) {
            $block_text = '';
            foreach ($block['paragraphs'] as $paragraph) {
                foreach ($paragraph['words'] as $word) {
                    foreach ($word['symbols'] as $symbol) {
                        $block_text .= $symbol['text'];
                    }
                    $block_text .= ' ';
                }
                $block_text .= "\n";
            }
            printf('Block text: %s' . PHP_EOL, $block_text);
            printf('Block bounds:' . PHP_EOL);
            foreach ($block['boundingBox']['vertices'] as $vertice) {
                printf('X: %s Y: %s' . PHP_EOL, $vertice['x'], $vertice['y']);
            }
            printf(PHP_EOL);
        }
    }
}

Python

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

def detect_document_uri(uri):
    """Detects document features in the file located in Google Cloud
    Storage."""
    client = vision.ImageAnnotatorClient()
    image = types.Image()
    image.source.image_uri = uri

    response = client.document_text_detection(image=image)
    document = response.full_text_annotation

    for page in document.pages:
        for block in page.blocks:
            block_words = []
            for paragraph in block.paragraphs:
                block_words.extend(paragraph.words)

            block_symbols = []
            for word in block_words:
                block_symbols.extend(word.symbols)

            block_text = ''
            for symbol in block_symbols:
                block_text = block_text + symbol.text

            print('Block Content: {}'.format(block_text))
            print('Block Bounds:\n {}'.format(block.bounding_box))

Ruby

For more on installing and creating a Cloud Vision API client, refer to Cloud Vision API Client Libraries.

# project_id = "Your Google Cloud project ID"
# image_path = "Google Cloud Storage URI, eg. 'gs://my-bucket/image.png'"

require "google/cloud/vision"

vision = Google::Cloud::Vision.new project: project_id
image  = vision.image image_path

document = image.document

puts document.text

Send feedback about...

Google Cloud Vision API Documentation