Invia una richiesta di elaborazione online a un processore OCR di documenti ed esegue l'analisi della risposta. Estrae e stampa il testo completo, le dimensioni della pagina, le lingue rilevate, i paragrafi, i blocchi, le righe e i token.
Per saperne di più
Per la documentazione dettagliata che include questo esempio di codice, consulta quanto segue:
Esempio di codice
Java
Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Document AI Java.
Per autenticarti a Document AI, configura le Credenziali predefinite dell'applicazione. Per ulteriori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.
import com.google.cloud.documentai.v1beta3.Document;
import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient;
import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceSettings;
import com.google.cloud.documentai.v1beta3.ProcessRequest;
import com.google.cloud.documentai.v1beta3.ProcessResponse;
import com.google.cloud.documentai.v1beta3.RawDocument;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
public class ProcessOcrDocument {
public static void processOcrDocument()
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
String location = "your-project-location"; // Format is "us" or "eu".
String processerId = "your-processor-id";
String filePath = "path/to/input/file.pdf";
processOcrDocument(projectId, location, processerId, filePath);
}
public static void processOcrDocument(
String projectId, String location, String processorId, String filePath)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// Initialize client that will be used to send requests. This client only needs
// to be created
// once, and can be reused for multiple requests. After completing all of your
// requests, call
// the "close" method on the client to safely clean up any remaining background
// resources.
String endpoint = String.format("%s-documentai.googleapis.com:443", location);
DocumentProcessorServiceSettings settings =
DocumentProcessorServiceSettings.newBuilder().setEndpoint(endpoint).build();
try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create(settings)) {
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
String name =
String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId);
// Read the file.
byte[] imageFileData = Files.readAllBytes(Paths.get(filePath));
// Convert the image data to a Buffer and base64 encode it.
ByteString content = ByteString.copyFrom(imageFileData);
RawDocument document =
RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build();
// Configure the process request.
ProcessRequest request =
ProcessRequest.newBuilder().setName(name).setRawDocument(document).build();
// Recognizes text entities in the PDF document
ProcessResponse result = client.processDocument(request);
Document documentResponse = result.getDocument();
System.out.println("Document processing complete.");
// Read the text recognition output from the processor
// For a full list of Document object attributes,
// please reference this page:
// https://googleapis.dev/java/google-cloud-document-ai/latest/index.html
// Get all of the document text as one big string
String text = documentResponse.getText();
System.out.printf("Full document text: '%s'\n", escapeNewlines(text));
// Read the text recognition output from the processor
List<Document.Page> pages = documentResponse.getPagesList();
System.out.printf("There are %s page(s) in this document.\n", pages.size());
for (Document.Page page : pages) {
System.out.printf("Page %d:\n", page.getPageNumber());
printPageDimensions(page.getDimension());
printDetectedLanguages(page.getDetectedLanguagesList());
printParagraphs(page.getParagraphsList(), text);
printBlocks(page.getBlocksList(), text);
printLines(page.getLinesList(), text);
printTokens(page.getTokensList(), text);
}
}
}
private static void printPageDimensions(Document.Page.Dimension dimension) {
String unit = dimension.getUnit();
System.out.printf(" Width: %.1f %s\n", dimension.getWidth(), unit);
System.out.printf(" Height: %.1f %s\n", dimension.getHeight(), unit);
}
private static void printDetectedLanguages(
List<Document.Page.DetectedLanguage> detectedLangauges) {
System.out.println(" Detected languages:");
for (Document.Page.DetectedLanguage detectedLanguage : detectedLangauges) {
String languageCode = detectedLanguage.getLanguageCode();
float confidence = detectedLanguage.getConfidence();
System.out.printf(" %s (%.2f%%)\n", languageCode, confidence * 100.0);
}
}
private static void printParagraphs(List<Document.Page.Paragraph> paragraphs, String text) {
System.out.printf(" %d paragraphs detected:\n", paragraphs.size());
Document.Page.Paragraph firstParagraph = paragraphs.get(0);
String firstParagraphText = getLayoutText(firstParagraph.getLayout().getTextAnchor(), text);
System.out.printf(" First paragraph text: %s\n", escapeNewlines(firstParagraphText));
Document.Page.Paragraph lastParagraph = paragraphs.get(paragraphs.size() - 1);
String lastParagraphText = getLayoutText(lastParagraph.getLayout().getTextAnchor(), text);
System.out.printf(" Last paragraph text: %s\n", escapeNewlines(lastParagraphText));
}
private static void printBlocks(List<Document.Page.Block> blocks, String text) {
System.out.printf(" %d blocks detected:\n", blocks.size());
Document.Page.Block firstBlock = blocks.get(0);
String firstBlockText = getLayoutText(firstBlock.getLayout().getTextAnchor(), text);
System.out.printf(" First block text: %s\n", escapeNewlines(firstBlockText));
Document.Page.Block lastBlock = blocks.get(blocks.size() - 1);
String lastBlockText = getLayoutText(lastBlock.getLayout().getTextAnchor(), text);
System.out.printf(" Last block text: %s\n", escapeNewlines(lastBlockText));
}
private static void printLines(List<Document.Page.Line> lines, String text) {
System.out.printf(" %d lines detected:\n", lines.size());
Document.Page.Line firstLine = lines.get(0);
String firstLineText = getLayoutText(firstLine.getLayout().getTextAnchor(), text);
System.out.printf(" First line text: %s\n", escapeNewlines(firstLineText));
Document.Page.Line lastLine = lines.get(lines.size() - 1);
String lastLineText = getLayoutText(lastLine.getLayout().getTextAnchor(), text);
System.out.printf(" Last line text: %s\n", escapeNewlines(lastLineText));
}
private static void printTokens(List<Document.Page.Token> tokens, String text) {
System.out.printf(" %d tokens detected:\n", tokens.size());
Document.Page.Token firstToken = tokens.get(0);
String firstTokenText = getLayoutText(firstToken.getLayout().getTextAnchor(), text);
System.out.printf(" First token text: %s\n", escapeNewlines(firstTokenText));
Document.Page.Token lastToken = tokens.get(tokens.size() - 1);
String lastTokenText = getLayoutText(lastToken.getLayout().getTextAnchor(), text);
System.out.printf(" Last token text: %s\n", escapeNewlines(lastTokenText));
}
// Extract shards from the text field
private static String getLayoutText(Document.TextAnchor textAnchor, String text) {
if (textAnchor.getTextSegmentsList().size() > 0) {
int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex();
int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex();
return text.substring(startIdx, endIdx);
}
return "[NO TEXT]";
}
private static String escapeNewlines(String s) {
return s.replace("\n", "\\n").replace("\r", "\\r");
}
}
Node.js
Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Document AI Node.js.
Per autenticarti a Document AI, configura le Credenziali predefinite dell'applicazione. Per ulteriori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
// const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console
// const filePath = '/path/to/local/pdf';
const {DocumentProcessorServiceClient} =
require('@google-cloud/documentai').v1beta3;
// Instantiates a client
const client = new DocumentProcessorServiceClient();
async function processDocument() {
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
// Read the file into memory.
const fs = require('fs').promises;
const imageFile = await fs.readFile(filePath);
// Convert the image data to a Buffer and base64 encode it.
const encodedImage = Buffer.from(imageFile).toString('base64');
const request = {
name,
rawDocument: {
content: encodedImage,
mimeType: 'application/pdf',
},
};
// Recognizes text entities in the PDF document
const [result] = await client.processDocument(request);
console.log('Document processing complete.');
// Read the text recognition output from the processor
// For a full list of Document object attributes,
// please reference this page: https://googleapis.dev/nodejs/documentai/latest/index.html
const {document} = result;
const {text} = document;
// Read the text recognition output from the processor
console.log(`Full document text: ${JSON.stringify(text)}`);
console.log(`There are ${document.pages.length} page(s) in this document.`);
for (const page of document.pages) {
console.log(`Page ${page.pageNumber}`);
printPageDimensions(page.dimension);
printDetectedLanguages(page.detectedLanguages);
printParagraphs(page.paragraphs, text);
printBlocks(page.blocks, text);
printLines(page.lines, text);
printTokens(page.tokens, text);
}
}
const printPageDimensions = dimension => {
console.log(` Width: ${dimension.width}`);
console.log(` Height: ${dimension.height}`);
};
const printDetectedLanguages = detectedLanguages => {
console.log(' Detected languages:');
for (const lang of detectedLanguages) {
const code = lang.languageCode;
const confPercent = lang.confidence * 100;
console.log(` ${code} (${confPercent.toFixed(2)}% confidence)`);
}
};
const printParagraphs = (paragraphs, text) => {
console.log(` ${paragraphs.length} paragraphs detected:`);
const firstParagraphText = getText(paragraphs[0].layout.textAnchor, text);
console.log(
` First paragraph text: ${JSON.stringify(firstParagraphText)}`
);
const lastParagraphText = getText(
paragraphs[paragraphs.length - 1].layout.textAnchor,
text
);
console.log(
` Last paragraph text: ${JSON.stringify(lastParagraphText)}`
);
};
const printBlocks = (blocks, text) => {
console.log(` ${blocks.length} blocks detected:`);
const firstBlockText = getText(blocks[0].layout.textAnchor, text);
console.log(` First block text: ${JSON.stringify(firstBlockText)}`);
const lastBlockText = getText(
blocks[blocks.length - 1].layout.textAnchor,
text
);
console.log(` Last block text: ${JSON.stringify(lastBlockText)}`);
};
const printLines = (lines, text) => {
console.log(` ${lines.length} lines detected:`);
const firstLineText = getText(lines[0].layout.textAnchor, text);
console.log(` First line text: ${JSON.stringify(firstLineText)}`);
const lastLineText = getText(
lines[lines.length - 1].layout.textAnchor,
text
);
console.log(` Last line text: ${JSON.stringify(lastLineText)}`);
};
const printTokens = (tokens, text) => {
console.log(` ${tokens.length} tokens detected:`);
const firstTokenText = getText(tokens[0].layout.textAnchor, text);
console.log(` First token text: ${JSON.stringify(firstTokenText)}`);
const firstTokenBreakType = tokens[0].detectedBreak.type;
console.log(` First token break type: ${firstTokenBreakType}`);
const lastTokenText = getText(
tokens[tokens.length - 1].layout.textAnchor,
text
);
console.log(` Last token text: ${JSON.stringify(lastTokenText)}`);
const lastTokenBreakType = tokens[tokens.length - 1].detectedBreak.type;
console.log(` Last token break type: ${lastTokenBreakType}`);
};
// Extract shards from the text field
const getText = (textAnchor, text) => {
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
return '';
}
// First shard in document doesn't have startIndex property
const startIndex = textAnchor.textSegments[0].startIndex || 0;
const endIndex = textAnchor.textSegments[0].endIndex;
return text.substring(startIndex, endIndex);
};
Python
Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Document AI Python.
Per autenticarti a Document AI, configura le Credenziali predefinite dell'applicazione. Per ulteriori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.
from typing import Optional, Sequence
from google.api_core.client_options import ClientOptions
from google.cloud import documentai
# TODO(developer): Uncomment these variables before running the sample.
# project_id = "YOUR_PROJECT_ID"
# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu"
# processor_id = "YOUR_PROCESSOR_ID" # Create processor before running sample
# processor_version = "rc" # Refer to https://cloud.google.com/document-ai/docs/manage-processor-versions for more information
# file_path = "/path/to/local/pdf"
# mime_type = "application/pdf" # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
def process_document_ocr_sample(
project_id: str,
location: str,
processor_id: str,
processor_version: str,
file_path: str,
mime_type: str,
) -> None:
# Optional: Additional configurations for Document OCR Processor.
# For more information: https://cloud.google.com/document-ai/docs/enterprise-document-ocr
process_options = documentai.ProcessOptions(
ocr_config=documentai.OcrConfig(
enable_native_pdf_parsing=True,
enable_image_quality_scores=True,
enable_symbol=True,
# OCR Add Ons https://cloud.google.com/document-ai/docs/ocr-add-ons
premium_features=documentai.OcrConfig.PremiumFeatures(
compute_style_info=True,
enable_math_ocr=False, # Enable to use Math OCR Model
enable_selection_mark_detection=True,
),
)
)
# Online processing request to Document AI
document = process_document(
project_id,
location,
processor_id,
processor_version,
file_path,
mime_type,
process_options=process_options,
)
text = document.text
print(f"Full document text: {text}\n")
print(f"There are {len(document.pages)} page(s) in this document.\n")
for page in document.pages:
print(f"Page {page.page_number}:")
print_page_dimensions(page.dimension)
print_detected_languages(page.detected_languages)
print_blocks(page.blocks, text)
print_paragraphs(page.paragraphs, text)
print_lines(page.lines, text)
print_tokens(page.tokens, text)
if page.symbols:
print_symbols(page.symbols, text)
if page.image_quality_scores:
print_image_quality_scores(page.image_quality_scores)
if page.visual_elements:
print_visual_elements(page.visual_elements, text)
def print_page_dimensions(dimension: documentai.Document.Page.Dimension) -> None:
print(f" Width: {str(dimension.width)}")
print(f" Height: {str(dimension.height)}")
def print_detected_languages(
detected_languages: Sequence[documentai.Document.Page.DetectedLanguage],
) -> None:
print(" Detected languages:")
for lang in detected_languages:
print(f" {lang.language_code} ({lang.confidence:.1%} confidence)")
def print_blocks(blocks: Sequence[documentai.Document.Page.Block], text: str) -> None:
print(f" {len(blocks)} blocks detected:")
first_block_text = layout_to_text(blocks[0].layout, text)
print(f" First text block: {repr(first_block_text)}")
last_block_text = layout_to_text(blocks[-1].layout, text)
print(f" Last text block: {repr(last_block_text)}")
def print_paragraphs(
paragraphs: Sequence[documentai.Document.Page.Paragraph], text: str
) -> None:
print(f" {len(paragraphs)} paragraphs detected:")
first_paragraph_text = layout_to_text(paragraphs[0].layout, text)
print(f" First paragraph text: {repr(first_paragraph_text)}")
last_paragraph_text = layout_to_text(paragraphs[-1].layout, text)
print(f" Last paragraph text: {repr(last_paragraph_text)}")
def print_lines(lines: Sequence[documentai.Document.Page.Line], text: str) -> None:
print(f" {len(lines)} lines detected:")
first_line_text = layout_to_text(lines[0].layout, text)
print(f" First line text: {repr(first_line_text)}")
last_line_text = layout_to_text(lines[-1].layout, text)
print(f" Last line text: {repr(last_line_text)}")
def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) -> None:
print(f" {len(tokens)} tokens detected:")
first_token_text = layout_to_text(tokens[0].layout, text)
first_token_break_type = tokens[0].detected_break.type_.name
print(f" First token text: {repr(first_token_text)}")
print(f" First token break type: {repr(first_token_break_type)}")
if tokens[0].style_info:
print_style_info(tokens[0].style_info)
last_token_text = layout_to_text(tokens[-1].layout, text)
last_token_break_type = tokens[-1].detected_break.type_.name
print(f" Last token text: {repr(last_token_text)}")
print(f" Last token break type: {repr(last_token_break_type)}")
if tokens[-1].style_info:
print_style_info(tokens[-1].style_info)
def print_symbols(
symbols: Sequence[documentai.Document.Page.Symbol], text: str
) -> None:
print(f" {len(symbols)} symbols detected:")
first_symbol_text = layout_to_text(symbols[0].layout, text)
print(f" First symbol text: {repr(first_symbol_text)}")
last_symbol_text = layout_to_text(symbols[-1].layout, text)
print(f" Last symbol text: {repr(last_symbol_text)}")
def print_image_quality_scores(
image_quality_scores: documentai.Document.Page.ImageQualityScores,
) -> None:
print(f" Quality score: {image_quality_scores.quality_score:.1%}")
print(" Detected defects:")
for detected_defect in image_quality_scores.detected_defects:
print(f" {detected_defect.type_}: {detected_defect.confidence:.1%}")
def print_style_info(style_info: documentai.Document.Page.Token.StyleInfo) -> None:
"""
Only supported in version `pretrained-ocr-v2.0-2023-06-02`
"""
print(f" Font Size: {style_info.font_size}pt")
print(f" Font Type: {style_info.font_type}")
print(f" Bold: {style_info.bold}")
print(f" Italic: {style_info.italic}")
print(f" Underlined: {style_info.underlined}")
print(f" Handwritten: {style_info.handwritten}")
print(
f" Text Color (RGBa): {style_info.text_color.red}, {style_info.text_color.green}, {style_info.text_color.blue}, {style_info.text_color.alpha}"
)
def print_visual_elements(
visual_elements: Sequence[documentai.Document.Page.VisualElement], text: str
) -> None:
"""
Only supported in version `pretrained-ocr-v2.0-2023-06-02`
"""
checkboxes = [x for x in visual_elements if "checkbox" in x.type]
math_symbols = [x for x in visual_elements if x.type == "math_formula"]
if checkboxes:
print(f" {len(checkboxes)} checkboxes detected:")
print(f" First checkbox: {repr(checkboxes[0].type)}")
print(f" Last checkbox: {repr(checkboxes[-1].type)}")
if math_symbols:
print(f" {len(math_symbols)} math symbols detected:")
first_math_symbol_text = layout_to_text(math_symbols[0].layout, text)
print(f" First math symbol: {repr(first_math_symbol_text)}")
def process_document(
project_id: str,
location: str,
processor_id: str,
processor_version: str,
file_path: str,
mime_type: str,
process_options: Optional[documentai.ProcessOptions] = None,
) -> documentai.Document:
# You must set the `api_endpoint` if you use a location other than "us".
client = documentai.DocumentProcessorServiceClient(
client_options=ClientOptions(
api_endpoint=f"{location}-documentai.googleapis.com"
)
)
# The full resource name of the processor version, e.g.:
# `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
# You must create a processor before running this sample.
name = client.processor_version_path(
project_id, location, processor_id, processor_version
)
# Read the file into memory
with open(file_path, "rb") as image:
image_content = image.read()
# Configure the process request
request = documentai.ProcessRequest(
name=name,
raw_document=documentai.RawDocument(content=image_content, mime_type=mime_type),
# Only supported for Document OCR processor
process_options=process_options,
)
result = client.process_document(request=request)
# For a full list of `Document` object attributes, reference this page:
# https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
return result.document
def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
"""
Document AI identifies text in different parts of the document by their
offsets in the entirety of the document"s text. This function converts
offsets to a string.
"""
# If a text segment spans several lines, it will
# be stored in different text segments.
return "".join(
text[int(segment.start_index) : int(segment.end_index)]
for segment in layout.text_anchor.text_segments
)
Passaggi successivi
Per cercare e filtrare i sample di codice per altri prodotti Google Cloud , consulta il browser di sample diGoogle Cloud .