Cloud Storage に保存されている PDF ファイルに対して光学式文字認識(OCR)を実行します。
もっと見る
このコードサンプルを含む詳細なドキュメントについては、以下をご覧ください。
コードサンプル
Go
このサンプルを試す前に、Vision クイックスタート: クライアント ライブラリの使用にある Go の設定手順を完了してください。詳細については、Vision Go API のリファレンス ドキュメントをご覧ください。
Vision に対する認証を行うには、アプリケーションのデフォルト認証情報を設定します。詳細については、ローカル開発環境の認証を設定するをご覧ください。
// detectAsyncDocumentURI performs Optical Character Recognition (OCR) on a
// PDF file stored in GCS.
func detectAsyncDocumentURI(w io.Writer, gcsSourceURI, gcsDestinationURI string) error {
ctx := context.Background()
client, err := vision.NewImageAnnotatorClient(ctx)
if err != nil {
return err
}
request := &visionpb.AsyncBatchAnnotateFilesRequest{
Requests: []*visionpb.AsyncAnnotateFileRequest{
{
Features: []*visionpb.Feature{
{
Type: visionpb.Feature_DOCUMENT_TEXT_DETECTION,
},
},
InputConfig: &visionpb.InputConfig{
GcsSource: &visionpb.GcsSource{Uri: gcsSourceURI},
// Supported MimeTypes are: "application/pdf" and "image/tiff".
MimeType: "application/pdf",
},
OutputConfig: &visionpb.OutputConfig{
GcsDestination: &visionpb.GcsDestination{Uri: gcsDestinationURI},
// How many pages should be grouped into each json output file.
BatchSize: 2,
},
},
},
}
operation, err := client.AsyncBatchAnnotateFiles(ctx, request)
if err != nil {
return err
}
fmt.Fprintf(w, "Waiting for the operation to finish.")
resp, err := operation.Wait(ctx)
if err != nil {
return err
}
fmt.Fprintf(w, "%v", resp)
return nil
}
Java
このサンプルを試す前に、Vision クイックスタート: クライアント ライブラリの使用にある Java の設定手順を完了してください。詳細については、Vision Java API のリファレンス ドキュメントをご覧ください。
Vision に対する認証を行うには、アプリケーションのデフォルト認証情報を設定します。詳細については、ローカル開発環境の認証を設定するをご覧ください。
/**
* Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
*
* @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
* text on.
* @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
* results on.
* @throws Exception on errors while closing the client.
*/
public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath)
throws Exception {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
List<AsyncAnnotateFileRequest> requests = new ArrayList<>();
// Set the GCS source path for the remote file.
GcsSource gcsSource = GcsSource.newBuilder().setUri(gcsSourcePath).build();
// Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
// types
InputConfig inputConfig =
InputConfig.newBuilder()
.setMimeType(
"application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff"
.setGcsSource(gcsSource)
.build();
// Set the GCS destination path for where to save the results.
GcsDestination gcsDestination =
GcsDestination.newBuilder().setUri(gcsDestinationPath).build();
// Create the configuration for the System.output with the batch size.
// The batch size sets how many pages should be grouped into each json System.output file.
OutputConfig outputConfig =
OutputConfig.newBuilder().setBatchSize(2).setGcsDestination(gcsDestination).build();
// Select the Feature required by the vision API
Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
// Build the OCR request
AsyncAnnotateFileRequest request =
AsyncAnnotateFileRequest.newBuilder()
.addFeatures(feature)
.setInputConfig(inputConfig)
.setOutputConfig(outputConfig)
.build();
requests.add(request);
// Perform the OCR request
OperationFuture<AsyncBatchAnnotateFilesResponse, OperationMetadata> response =
client.asyncBatchAnnotateFilesAsync(requests);
System.out.println("Waiting for the operation to finish.");
// Wait for the request to finish. (The result is not used, since the API saves the result to
// the specified location on GCS.)
List<AsyncAnnotateFileResponse> result =
response.get(180, TimeUnit.SECONDS).getResponsesList();
// Once the request has completed and the System.output has been
// written to GCS, we can list all the System.output files.
Storage storage = StorageOptions.getDefaultInstance().getService();
// Get the destination location from the gcsDestinationPath
Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
Matcher matcher = pattern.matcher(gcsDestinationPath);
if (matcher.find()) {
String bucketName = matcher.group(1);
String prefix = matcher.group(2);
// Get the list of objects with the given prefix from the GCS bucket
Bucket bucket = storage.get(bucketName);
com.google.api.gax.paging.Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));
Blob firstOutputFile = null;
// List objects with the given prefix.
System.out.println("Output files:");
for (Blob blob : pageList.iterateAll()) {
System.out.println(blob.getName());
// Process the first System.output file from GCS.
// Since we specified batch size = 2, the first response contains
// the first two pages of the input file.
if (firstOutputFile == null) {
firstOutputFile = blob;
}
}
// Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
// object. If the Blob is small read all its content in one request
// (Note: the file is a .json file)
// Storage guide: https://cloud.google.com/storage/docs/downloading-objects
String jsonContents = new String(firstOutputFile.getContent());
Builder builder = AnnotateFileResponse.newBuilder();
JsonFormat.parser().merge(jsonContents, builder);
// Build the AnnotateFileResponse object
AnnotateFileResponse annotateFileResponse = builder.build();
// Parse through the object to get the actual response for the first page of the input file.
AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0);
// Here we print the full text from the first page.
// The response contains more information:
// annotation/pages/blocks/paragraphs/words/symbols
// including confidence score and bounding boxes
System.out.format("%nText: %s%n", annotateImageResponse.getFullTextAnnotation().getText());
} else {
System.out.println("No MATCH");
}
}
}
Node.js
このサンプルを試す前に、Vision クイックスタート: クライアント ライブラリの使用にある Node.js の設定手順を完了してください。詳細については、Vision Node.js API のリファレンス ドキュメントをご覧ください。
Vision に対する認証を行うには、アプリケーションのデフォルト認証情報を設定します。詳細については、ローカル開発環境の認証を設定するをご覧ください。
// Imports the Google Cloud client libraries
const vision = require('@google-cloud/vision').v1;
// Creates a client
const client = new vision.ImageAnnotatorClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// Bucket where the file resides
// const bucketName = 'my-bucket';
// Path to PDF file within bucket
// const fileName = 'path/to/document.pdf';
// The folder to store the results
// const outputPrefix = 'results'
const gcsSourceUri = `gs://${bucketName}/${fileName}`;
const gcsDestinationUri = `gs://${bucketName}/${outputPrefix}/`;
const inputConfig = {
// Supported mime_types are: 'application/pdf' and 'image/tiff'
mimeType: 'application/pdf',
gcsSource: {
uri: gcsSourceUri,
},
};
const outputConfig = {
gcsDestination: {
uri: gcsDestinationUri,
},
};
const features = [{type: 'DOCUMENT_TEXT_DETECTION'}];
const request = {
requests: [
{
inputConfig: inputConfig,
features: features,
outputConfig: outputConfig,
},
],
};
const [operation] = await client.asyncBatchAnnotateFiles(request);
const [filesResponse] = await operation.promise();
const destinationUri =
filesResponse.responses[0].outputConfig.gcsDestination.uri;
console.log('Json saved to: ' + destinationUri);
PHP
このサンプルを試す前に、Vision クイックスタート: クライアント ライブラリの使用にある PHP の設定手順を完了してください。詳細については、Vision PHP API のリファレンス ドキュメントをご覧ください。
Vision に対する認証を行うには、アプリケーションのデフォルト認証情報を設定します。詳細については、ローカル開発環境の認証を設定するをご覧ください。
namespace Google\Cloud\Samples\Vision;
use Google\Cloud\Storage\StorageClient;
use Google\Cloud\Vision\V1\AnnotateFileResponse;
use Google\Cloud\Vision\V1\AsyncAnnotateFileRequest;
use Google\Cloud\Vision\V1\Feature;
use Google\Cloud\Vision\V1\Feature\Type;
use Google\Cloud\Vision\V1\GcsDestination;
use Google\Cloud\Vision\V1\GcsSource;
use Google\Cloud\Vision\V1\ImageAnnotatorClient;
use Google\Cloud\Vision\V1\InputConfig;
use Google\Cloud\Vision\V1\OutputConfig;
/**
* @param string $path GCS path to the document, e.g. "gs://path/to/your/document.pdf"
* @param string $outFile GCS path to store the results, e.g. "gs://path/to/store/results/"
*/
function detect_pdf_gcs(string $path, string $output)
{
# select ocr feature
$feature = (new Feature())
->setType(Type::DOCUMENT_TEXT_DETECTION);
# set $path (file to OCR) as source
$gcsSource = (new GcsSource())
->setUri($path);
# supported mime_types are: 'application/pdf' and 'image/tiff'
$mimeType = 'application/pdf';
$inputConfig = (new InputConfig())
->setGcsSource($gcsSource)
->setMimeType($mimeType);
# set $output as destination
$gcsDestination = (new GcsDestination())
->setUri($output);
# how many pages should be grouped into each json output file.
$batchSize = 2;
$outputConfig = (new OutputConfig())
->setGcsDestination($gcsDestination)
->setBatchSize($batchSize);
# prepare request using configs set above
$request = (new AsyncAnnotateFileRequest())
->setFeatures([$feature])
->setInputConfig($inputConfig)
->setOutputConfig($outputConfig);
$requests = [$request];
# make request
$imageAnnotator = new ImageAnnotatorClient();
$operation = $imageAnnotator->asyncBatchAnnotateFiles($requests);
print('Waiting for operation to finish.' . PHP_EOL);
$operation->pollUntilComplete();
# once the request has completed and the output has been
# written to GCS, we can list all the output files.
preg_match('/^gs:\/\/([a-zA-Z0-9\._\-]+)\/?(\S+)?$/', $output, $match);
$bucketName = $match[1];
$prefix = isset($match[2]) ? $match[2] : '';
$storage = new StorageClient();
$bucket = $storage->bucket($bucketName);
$options = ['prefix' => $prefix];
$objects = $bucket->objects($options);
# save first object for sample below
$objects->next();
$firstObject = $objects->current();
# list objects with the given prefix.
print('Output files:' . PHP_EOL);
foreach ($objects as $object) {
print($object->name() . PHP_EOL);
}
# process the first output file from GCS.
# since we specified batch_size=2, the first response contains
# the first two pages of the input file.
$jsonString = $firstObject->downloadAsString();
$firstBatch = new AnnotateFileResponse();
$firstBatch->mergeFromJsonString($jsonString);
# get annotation and print text
foreach ($firstBatch->getResponses() as $response) {
$annotation = $response->getFullTextAnnotation();
print($annotation->getText());
}
$imageAnnotator->close();
}
Python
このサンプルを試す前に、Vision クイックスタート: クライアント ライブラリの使用にある Python の設定手順を完了してください。詳細については、Vision Python API のリファレンス ドキュメントをご覧ください。
Vision に対する認証を行うには、アプリケーションのデフォルト認証情報を設定します。詳細については、ローカル開発環境の認証を設定するをご覧ください。
def async_detect_document(gcs_source_uri, gcs_destination_uri):
"""OCR with PDF/TIFF as source files on GCS"""
import json
import re
from google.cloud import vision
from google.cloud import storage
# Supported mime_types are: 'application/pdf' and 'image/tiff'
mime_type = "application/pdf"
# How many pages should be grouped into each json output file.
batch_size = 2
client = vision.ImageAnnotatorClient()
feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
gcs_source = vision.GcsSource(uri=gcs_source_uri)
input_config = vision.InputConfig(gcs_source=gcs_source, mime_type=mime_type)
gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
output_config = vision.OutputConfig(
gcs_destination=gcs_destination, batch_size=batch_size
)
async_request = vision.AsyncAnnotateFileRequest(
features=[feature], input_config=input_config, output_config=output_config
)
operation = client.async_batch_annotate_files(requests=[async_request])
print("Waiting for the operation to finish.")
operation.result(timeout=420)
# Once the request has completed and the output has been
# written to GCS, we can list all the output files.
storage_client = storage.Client()
match = re.match(r"gs://([^/]+)/(.+)", gcs_destination_uri)
bucket_name = match.group(1)
prefix = match.group(2)
bucket = storage_client.get_bucket(bucket_name)
# List objects with the given prefix, filtering out folders.
blob_list = [
blob
for blob in list(bucket.list_blobs(prefix=prefix))
if not blob.name.endswith("/")
]
print("Output files:")
for blob in blob_list:
print(blob.name)
# Process the first output file from GCS.
# Since we specified batch_size=2, the first response contains
# the first two pages of the input file.
output = blob_list[0]
json_string = output.download_as_bytes().decode("utf-8")
response = json.loads(json_string)
# The actual response for the first page of the input file.
first_page_response = response["responses"][0]
annotation = first_page_response["fullTextAnnotation"]
# Here we print the full text from the first page.
# The response contains more information:
# annotation/pages/blocks/paragraphs/words/symbols
# including confidence scores and bounding boxes
print("Full text:\n")
print(annotation["text"])
次のステップ
他の Google Cloud プロダクトに関連するコードサンプルの検索およびフィルタ検索を行うには、Google Cloud のサンプルをご覧ください。