Cloud Storage に画像ファイルをアップロードし、Vision API と Translation API を使用して画像からテキストを抽出、変換する方法を示します。
もっと見る
このコードサンプルを含む詳細なドキュメントについては、以下をご覧ください。
コードサンプル
Go
package ocr
import (
"context"
"encoding/json"
"fmt"
"log"
"cloud.google.com/go/pubsub"
"golang.org/x/text/language"
visionpb "google.golang.org/genproto/googleapis/cloud/vision/v1"
)
// detectText detects the text in an image using the Google Vision API.
func detectText(ctx context.Context, bucketName, fileName string) error {
log.Printf("Looking for text in image %v", fileName)
maxResults := 1
image := &visionpb.Image{
Source: &visionpb.ImageSource{
GcsImageUri: fmt.Sprintf("gs://%s/%s", bucketName, fileName),
},
}
annotations, err := visionClient.DetectTexts(ctx, image, &visionpb.ImageContext{}, maxResults)
if err != nil {
return fmt.Errorf("DetectTexts: %v", err)
}
text := ""
if len(annotations) > 0 {
text = annotations[0].Description
}
if len(annotations) == 0 || len(text) == 0 {
log.Printf("No text detected in image %q. Returning early.", fileName)
return nil
}
log.Printf("Extracted text %q from image (%d chars).", text, len(text))
detectResponse, err := translateClient.DetectLanguage(ctx, []string{text})
if err != nil {
return fmt.Errorf("DetectLanguage: %v", err)
}
if len(detectResponse) == 0 || len(detectResponse[0]) == 0 {
return fmt.Errorf("DetectLanguage gave empty response")
}
srcLang := detectResponse[0][0].Language.String()
log.Printf("Detected language %q for text %q.", srcLang, text)
// Submit a message to the bus for each target language
for _, targetLang := range toLang {
topicName := translateTopic
if srcLang == targetLang || srcLang == "und" { // detection returns "und" for undefined language
topicName = resultTopic
}
targetTag, err := language.Parse(targetLang)
if err != nil {
return fmt.Errorf("language.Parse: %v", err)
}
srcTag, err := language.Parse(srcLang)
if err != nil {
return fmt.Errorf("language.Parse: %v", err)
}
message, err := json.Marshal(ocrMessage{
Text: text,
FileName: fileName,
Lang: targetTag,
SrcLang: srcTag,
})
if err != nil {
return fmt.Errorf("json.Marshal: %v", err)
}
topic := pubsubClient.Topic(topicName)
ok, err := topic.Exists(ctx)
if err != nil {
return fmt.Errorf("Exists: %v", err)
}
if !ok {
topic, err = pubsubClient.CreateTopic(ctx, topicName)
if err != nil {
return fmt.Errorf("CreateTopic: %v", err)
}
}
msg := &pubsub.Message{
Data: []byte(message),
}
if _, err = topic.Publish(ctx, msg).Get(ctx); err != nil {
return fmt.Errorf("Get: %v", err)
}
}
return nil
}
Java
private void detectText(String bucket, String filename) {
logger.info("Looking for text in image " + filename);
List<AnnotateImageRequest> visionRequests = new ArrayList<>();
String gcsPath = String.format("gs://%s/%s", bucket, filename);
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();
Feature textFeature = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build();
AnnotateImageRequest visionRequest =
AnnotateImageRequest.newBuilder().addFeatures(textFeature).setImage(img).build();
visionRequests.add(visionRequest);
// Detect text in an image using the Cloud Vision API
AnnotateImageResponse visionResponse;
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
visionResponse = client.batchAnnotateImages(visionRequests).getResponses(0);
if (visionResponse == null || !visionResponse.hasFullTextAnnotation()) {
logger.info(String.format("Image %s contains no text", filename));
return;
}
if (visionResponse.hasError()) {
// Log error
logger.log(
Level.SEVERE, "Error in vision API call: " + visionResponse.getError().getMessage());
return;
}
} catch (IOException e) {
// Log error (since IOException cannot be thrown by a Cloud Function)
logger.log(Level.SEVERE, "Error detecting text: " + e.getMessage(), e);
return;
}
String text = visionResponse.getFullTextAnnotation().getText();
logger.info("Extracted text from image: " + text);
// Detect language using the Cloud Translation API
DetectLanguageRequest languageRequest =
DetectLanguageRequest.newBuilder()
.setParent(LOCATION_NAME)
.setMimeType("text/plain")
.setContent(text)
.build();
DetectLanguageResponse languageResponse;
try (TranslationServiceClient client = TranslationServiceClient.create()) {
languageResponse = client.detectLanguage(languageRequest);
} catch (IOException e) {
// Log error (since IOException cannot be thrown by a function)
logger.log(Level.SEVERE, "Error detecting language: " + e.getMessage(), e);
return;
}
if (languageResponse.getLanguagesCount() == 0) {
logger.info("No languages were detected for text: " + text);
return;
}
String languageCode = languageResponse.getLanguages(0).getLanguageCode();
logger.info(String.format("Detected language %s for file %s", languageCode, filename));
// Send a Pub/Sub translation request for every language we're going to translate to
for (String targetLanguage : TO_LANGS) {
logger.info("Sending translation request for language " + targetLanguage);
OcrTranslateApiMessage message = new OcrTranslateApiMessage(text, filename, targetLanguage);
ByteString byteStr = ByteString.copyFrom(message.toPubsubData());
PubsubMessage pubsubApiMessage = PubsubMessage.newBuilder().setData(byteStr).build();
try {
publisher.publish(pubsubApiMessage).get();
} catch (InterruptedException | ExecutionException e) {
// Log error
logger.log(Level.SEVERE, "Error publishing translation request: " + e.getMessage(), e);
return;
}
}
}
Node.js
/**
* Detects the text in an image using the Google Vision API.
*
* @param {string} bucketName Cloud Storage bucket name.
* @param {string} filename Cloud Storage file name.
* @returns {Promise}
*/
const detectText = async (bucketName, filename) => {
console.log(`Looking for text in image ${filename}`);
const [textDetections] = await vision.textDetection(
`gs://${bucketName}/${filename}`
);
const [annotation] = textDetections.textAnnotations;
const text = annotation ? annotation.description.trim() : '';
console.log('Extracted text from image:', text);
let [translateDetection] = await translate.detect(text);
if (Array.isArray(translateDetection)) {
[translateDetection] = translateDetection;
}
console.log(
`Detected language "${translateDetection.language}" for ${filename}`
);
// Submit a message to the bus for each language we're going to translate to
const TO_LANGS = process.env.TO_LANG.split(',');
const topicName = process.env.TRANSLATE_TOPIC;
const tasks = TO_LANGS.map(lang => {
const messageData = {
text: text,
filename: filename,
lang: lang,
};
// Helper function that publishes translation result to a Pub/Sub topic
// For more information on publishing Pub/Sub messages, see this page:
// https://cloud.google.com/pubsub/docs/publisher
return publishResult(topicName, messageData);
});
return Promise.all(tasks);
};
Python
def detect_text(bucket, filename):
print("Looking for text in image {}".format(filename))
futures = []
image = vision.Image(
source=vision.ImageSource(gcs_image_uri=f"gs://{bucket}/{filename}")
)
text_detection_response = vision_client.text_detection(image=image)
annotations = text_detection_response.text_annotations
if len(annotations) > 0:
text = annotations[0].description
else:
text = ""
print("Extracted text {} from image ({} chars).".format(text, len(text)))
detect_language_response = translate_client.detect_language(text)
src_lang = detect_language_response["language"]
print("Detected language {} for text {}.".format(src_lang, text))
# Submit a message to the bus for each target language
to_langs = os.environ["TO_LANG"].split(",")
for target_lang in to_langs:
topic_name = os.environ["TRANSLATE_TOPIC"]
if src_lang == target_lang or src_lang == "und":
topic_name = os.environ["RESULT_TOPIC"]
message = {
"text": text,
"filename": filename,
"lang": target_lang,
"src_lang": src_lang,
}
message_data = json.dumps(message).encode("utf-8")
topic_path = publisher.topic_path(project_id, topic_name)
future = publisher.publish(topic_path, data=message_data)
futures.append(future)
for future in futures:
future.result()
次のステップ
他の Google Cloud プロダクトに関連するコードサンプルの検索およびフィルタ検索を行うには、Google Cloud のサンプルをご覧ください。