Detect text in a local file

Stay organized with collections Save and categorize content based on your preferences.

Perform text detection on a local file.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample


Before trying this sample, follow the Go setup instructions in the Vision quickstart using client libraries. For more information, see the Vision Go API reference documentation.

// detectDocumentText gets the full document text from the Vision API for an image at the given file path.
func detectDocumentText(w io.Writer, file string) error {
	ctx := context.Background()

	client, err := vision.NewImageAnnotatorClient(ctx)
	if err != nil {
		return err

	f, err := os.Open(file)
	if err != nil {
		return err
	defer f.Close()

	image, err := vision.NewImageFromReader(f)
	if err != nil {
		return err
	annotation, err := client.DetectDocumentText(ctx, image, nil)
	if err != nil {
		return err

	if annotation == nil {
		fmt.Fprintln(w, "No text found.")
	} else {
		fmt.Fprintln(w, "Document Text:")
		fmt.Fprintf(w, "%q\n", annotation.Text)

		fmt.Fprintln(w, "Pages:")
		for _, page := range annotation.Pages {
			fmt.Fprintf(w, "\tConfidence: %f, Width: %d, Height: %d\n", page.Confidence, page.Width, page.Height)
			fmt.Fprintln(w, "\tBlocks:")
			for _, block := range page.Blocks {
				fmt.Fprintf(w, "\t\tConfidence: %f, Block type: %v\n", block.Confidence, block.BlockType)
				fmt.Fprintln(w, "\t\tParagraphs:")
				for _, paragraph := range block.Paragraphs {
					fmt.Fprintf(w, "\t\t\tConfidence: %f", paragraph.Confidence)
					fmt.Fprintln(w, "\t\t\tWords:")
					for _, word := range paragraph.Words {
						symbols := make([]string, len(word.Symbols))
						for i, s := range word.Symbols {
							symbols[i] = s.Text
						wordText := strings.Join(symbols, "")
						fmt.Fprintf(w, "\t\t\t\tConfidence: %f, Symbols: %s\n", word.Confidence, wordText)

	return nil


Before trying this sample, follow the Java setup instructions in the Vision quickstart using client libraries. For more information, see the Vision Java API reference documentation.

public static void detectDocumentText(String filePath) throws IOException {
  List<AnnotateImageRequest> requests = new ArrayList<>();

  ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));

  Image img = Image.newBuilder().setContent(imgBytes).build();
  Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
  AnnotateImageRequest request =

  // Initialize client that will be used to send requests. This client only needs to be created
  // once, and can be reused for multiple requests. After completing all of your requests, call
  // the "close" method on the client to safely clean up any remaining background resources.
  try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
    BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
    List<AnnotateImageResponse> responses = response.getResponsesList();

    for (AnnotateImageResponse res : responses) {
      if (res.hasError()) {
        System.out.format("Error: %s%n", res.getError().getMessage());

      // For full list of available annotations, see
      TextAnnotation annotation = res.getFullTextAnnotation();
      for (Page page : annotation.getPagesList()) {
        String pageText = "";
        for (Block block : page.getBlocksList()) {
          String blockText = "";
          for (Paragraph para : block.getParagraphsList()) {
            String paraText = "";
            for (Word word : para.getWordsList()) {
              String wordText = "";
              for (Symbol symbol : word.getSymbolsList()) {
                wordText = wordText + symbol.getText();
                    "Symbol text: %s (confidence: %f)%n",
                    symbol.getText(), symbol.getConfidence());
                  "Word text: %s (confidence: %f)%n%n", wordText, word.getConfidence());
              paraText = String.format("%s %s", paraText, wordText);
            // Output Example using Paragraph:
            System.out.println("%nParagraph: %n" + paraText);
            System.out.format("Paragraph Confidence: %f%n", para.getConfidence());
            blockText = blockText + paraText;
          pageText = pageText + blockText;
      System.out.println("%nComplete annotation:");


Before trying this sample, follow the Node.js setup instructions in the Vision quickstart using client libraries. For more information, see the Vision Node.js API reference documentation.

// Imports the Google Cloud client library
const vision = require('@google-cloud/vision');

// Creates a client
const client = new vision.ImageAnnotatorClient();

 * TODO(developer): Uncomment the following line before running the sample.
// const fileName = 'Local image file, e.g. /path/to/image.png';

// Read a local image as a text document
const [result] = await client.documentTextDetection(fileName);
const fullTextAnnotation = result.fullTextAnnotation;
console.log(`Full text: ${fullTextAnnotation.text}`);
fullTextAnnotation.pages.forEach(page => {
  page.blocks.forEach(block => {
    console.log(`Block confidence: ${block.confidence}`);
    block.paragraphs.forEach(paragraph => {
      console.log(`Paragraph confidence: ${paragraph.confidence}`);
      paragraph.words.forEach(word => {
        const wordText = => s.text).join('');
        console.log(`Word text: ${wordText}`);
        console.log(`Word confidence: ${word.confidence}`);
        word.symbols.forEach(symbol => {
          console.log(`Symbol text: ${symbol.text}`);
          console.log(`Symbol confidence: ${symbol.confidence}`);


Before trying this sample, follow the PHP setup instructions in the Vision quickstart using client libraries. For more information, see the Vision PHP API reference documentation.

namespace Google\Cloud\Samples\Vision;

use Google\Cloud\Vision\V1\ImageAnnotatorClient;

 * @param string $path Path to the image, e.g. "path/to/your/image.jpg"
function detect_document_text(string $path)
    $imageAnnotator = new ImageAnnotatorClient();

    # annotate the image
    $image = file_get_contents($path);
    $response = $imageAnnotator->documentTextDetection($image);
    $annotation = $response->getFullTextAnnotation();

    # print out detailed and structured information about document text
    if ($annotation) {
        foreach ($annotation->getPages() as $page) {
            foreach ($page->getBlocks() as $block) {
                $block_text = '';
                foreach ($block->getParagraphs() as $paragraph) {
                    foreach ($paragraph->getWords() as $word) {
                        foreach ($word->getSymbols() as $symbol) {
                            $block_text .= $symbol->getText();
                        $block_text .= ' ';
                    $block_text .= "\n";
                printf('Block content: %s', $block_text);
                printf('Block confidence: %f' . PHP_EOL,

                # get bounds
                $vertices = $block->getBoundingBox()->getVertices();
                $bounds = [];
                foreach ($vertices as $vertex) {
                    $bounds[] = sprintf('(%d,%d)', $vertex->getX(),
                print('Bounds: ' . join(', ', $bounds) . PHP_EOL);
    } else {
        print('No text found' . PHP_EOL);



Before trying this sample, follow the Python setup instructions in the Vision quickstart using client libraries. For more information, see the Vision Python API reference documentation.

def detect_document(path):
    """Detects document features in an image."""
    from import vision
    import io
    client = vision.ImageAnnotatorClient()

    with, 'rb') as image_file:
        content =

    image = vision.Image(content=content)

    response = client.document_text_detection(image=image)

    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            print('\nBlock confidence: {}\n'.format(block.confidence))

            for paragraph in block.paragraphs:
                print('Paragraph confidence: {}'.format(

                for word in paragraph.words:
                    word_text = ''.join([
                        symbol.text for symbol in word.symbols
                    print('Word text: {} (confidence: {})'.format(
                        word_text, word.confidence))

                    for symbol in word.symbols:
                        print('\tSymbol: {} (confidence: {})'.format(
                            symbol.text, symbol.confidence))

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.