Zeigt, wie Sie Bilddateien in Cloud Storage hochladen und Text mithilfe der Vision API und Translation API aus den Bildern extrahieren und übersetzen.
Weitere Informationen
Eine ausführliche Dokumentation, die dieses Codebeispiel enthält, finden Sie hier:
Codebeispiel
Go
package ocr
import (
"context"
"encoding/json"
"fmt"
"log"
"cloud.google.com/go/pubsub"
"golang.org/x/text/language"
visionpb "google.golang.org/genproto/googleapis/cloud/vision/v1"
)
// detectText detects the text in an image using the Google Vision API.
func detectText(ctx context.Context, bucketName, fileName string) error {
log.Printf("Looking for text in image %v", fileName)
maxResults := 1
image := &visionpb.Image{
Source: &visionpb.ImageSource{
GcsImageUri: fmt.Sprintf("gs://%s/%s", bucketName, fileName),
},
}
annotations, err := visionClient.DetectTexts(ctx, image, &visionpb.ImageContext{}, maxResults)
if err != nil {
return fmt.Errorf("DetectTexts: %v", err)
}
text := ""
if len(annotations) > 0 {
text = annotations[0].Description
}
if len(annotations) == 0 || len(text) == 0 {
log.Printf("No text detected in image %q. Returning early.", fileName)
return nil
}
log.Printf("Extracted text %q from image (%d chars).", text, len(text))
detectResponse, err := translateClient.DetectLanguage(ctx, []string{text})
if err != nil {
return fmt.Errorf("DetectLanguage: %v", err)
}
if len(detectResponse) == 0 || len(detectResponse[0]) == 0 {
return fmt.Errorf("DetectLanguage gave empty response")
}
srcLang := detectResponse[0][0].Language.String()
log.Printf("Detected language %q for text %q.", srcLang, text)
// Submit a message to the bus for each target language
for _, targetLang := range toLang {
topicName := translateTopic
if srcLang == targetLang || srcLang == "und" { // detection returns "und" for undefined language
topicName = resultTopic
}
targetTag, err := language.Parse(targetLang)
if err != nil {
return fmt.Errorf("language.Parse: %v", err)
}
srcTag, err := language.Parse(srcLang)
if err != nil {
return fmt.Errorf("language.Parse: %v", err)
}
message, err := json.Marshal(ocrMessage{
Text: text,
FileName: fileName,
Lang: targetTag,
SrcLang: srcTag,
})
if err != nil {
return fmt.Errorf("json.Marshal: %v", err)
}
topic := pubsubClient.Topic(topicName)
ok, err := topic.Exists(ctx)
if err != nil {
return fmt.Errorf("Exists: %v", err)
}
if !ok {
topic, err = pubsubClient.CreateTopic(ctx, topicName)
if err != nil {
return fmt.Errorf("CreateTopic: %v", err)
}
}
msg := &pubsub.Message{
Data: []byte(message),
}
if _, err = topic.Publish(ctx, msg).Get(ctx); err != nil {
return fmt.Errorf("Get: %v", err)
}
}
return nil
}
Java
private void detectText(String bucket, String filename) {
logger.info("Looking for text in image " + filename);
List<AnnotateImageRequest> visionRequests = new ArrayList<>();
String gcsPath = String.format("gs://%s/%s", bucket, filename);
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();
Feature textFeature = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build();
AnnotateImageRequest visionRequest =
AnnotateImageRequest.newBuilder().addFeatures(textFeature).setImage(img).build();
visionRequests.add(visionRequest);
// Detect text in an image using the Cloud Vision API
AnnotateImageResponse visionResponse;
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
visionResponse = client.batchAnnotateImages(visionRequests).getResponses(0);
if (visionResponse == null || !visionResponse.hasFullTextAnnotation()) {
logger.info(String.format("Image %s contains no text", filename));
return;
}
if (visionResponse.hasError()) {
// Log error
logger.log(
Level.SEVERE, "Error in vision API call: " + visionResponse.getError().getMessage());
return;
}
} catch (IOException e) {
// Log error (since IOException cannot be thrown by a Cloud Function)
logger.log(Level.SEVERE, "Error detecting text: " + e.getMessage(), e);
return;
}
String text = visionResponse.getFullTextAnnotation().getText();
logger.info("Extracted text from image: " + text);
// Detect language using the Cloud Translation API
DetectLanguageRequest languageRequest =
DetectLanguageRequest.newBuilder()
.setParent(LOCATION_NAME)
.setMimeType("text/plain")
.setContent(text)
.build();
DetectLanguageResponse languageResponse;
try (TranslationServiceClient client = TranslationServiceClient.create()) {
languageResponse = client.detectLanguage(languageRequest);
} catch (IOException e) {
// Log error (since IOException cannot be thrown by a function)
logger.log(Level.SEVERE, "Error detecting language: " + e.getMessage(), e);
return;
}
if (languageResponse.getLanguagesCount() == 0) {
logger.info("No languages were detected for text: " + text);
return;
}
String languageCode = languageResponse.getLanguages(0).getLanguageCode();
logger.info(String.format("Detected language %s for file %s", languageCode, filename));
// Send a Pub/Sub translation request for every language we're going to translate to
for (String targetLanguage : TO_LANGS) {
logger.info("Sending translation request for language " + targetLanguage);
OcrTranslateApiMessage message = new OcrTranslateApiMessage(text, filename, targetLanguage);
ByteString byteStr = ByteString.copyFrom(message.toPubsubData());
PubsubMessage pubsubApiMessage = PubsubMessage.newBuilder().setData(byteStr).build();
try {
publisher.publish(pubsubApiMessage).get();
} catch (InterruptedException | ExecutionException e) {
// Log error
logger.log(Level.SEVERE, "Error publishing translation request: " + e.getMessage(), e);
return;
}
}
}
Node.js
/**
* Detects the text in an image using the Google Vision API.
*
* @param {string} bucketName Cloud Storage bucket name.
* @param {string} filename Cloud Storage file name.
* @returns {Promise}
*/
const detectText = async (bucketName, filename) => {
console.log(`Looking for text in image ${filename}`);
const [textDetections] = await vision.textDetection(
`gs://${bucketName}/${filename}`
);
const [annotation] = textDetections.textAnnotations;
const text = annotation ? annotation.description.trim() : '';
console.log('Extracted text from image:', text);
let [translateDetection] = await translate.detect(text);
if (Array.isArray(translateDetection)) {
[translateDetection] = translateDetection;
}
console.log(
`Detected language "${translateDetection.language}" for ${filename}`
);
// Submit a message to the bus for each language we're going to translate to
const TO_LANGS = process.env.TO_LANG.split(',');
const topicName = process.env.TRANSLATE_TOPIC;
const tasks = TO_LANGS.map(lang => {
const messageData = {
text: text,
filename: filename,
lang: lang,
};
// Helper function that publishes translation result to a Pub/Sub topic
// For more information on publishing Pub/Sub messages, see this page:
// https://cloud.google.com/pubsub/docs/publisher
return publishResult(topicName, messageData);
});
return Promise.all(tasks);
};
Python
def detect_text(bucket, filename):
print("Looking for text in image {}".format(filename))
futures = []
image = vision.Image(
source=vision.ImageSource(gcs_image_uri=f"gs://{bucket}/{filename}")
)
text_detection_response = vision_client.text_detection(image=image)
annotations = text_detection_response.text_annotations
if len(annotations) > 0:
text = annotations[0].description
else:
text = ""
print("Extracted text {} from image ({} chars).".format(text, len(text)))
detect_language_response = translate_client.detect_language(text)
src_lang = detect_language_response["language"]
print("Detected language {} for text {}.".format(src_lang, text))
# Submit a message to the bus for each target language
to_langs = os.environ["TO_LANG"].split(",")
for target_lang in to_langs:
topic_name = os.environ["TRANSLATE_TOPIC"]
if src_lang == target_lang or src_lang == "und":
topic_name = os.environ["RESULT_TOPIC"]
message = {
"text": text,
"filename": filename,
"lang": target_lang,
"src_lang": src_lang,
}
message_data = json.dumps(message).encode("utf-8")
topic_path = publisher.topic_path(project_id, topic_name)
future = publisher.publish(topic_path, data=message_data)
futures.append(future)
for future in futures:
future.result()
Nächste Schritte
Informationen zum Suchen und Filtern von Codebeispielen für andere Google Cloud-Produkte finden Sie im Google Cloud-Beispielbrowser.