Cloud Storage의 파일 배치에 주석 추가(베타)

온라인 상태에서 Cloud Storage의 파일 배치에 주석을 추가합니다.

코드 샘플

자바

이 샘플을 시도해 보기 전에 Vision 빠른 시작: 클라이언트 라이브러리 사용의 자바 설정 안내를 따르세요. 자세한 내용은 Vision 자바 API 참조 문서를 확인하세요.

import com.google.api.core.ApiFuture;
import com.google.cloud.vision.v1p4beta1.AnnotateFileRequest;
import com.google.cloud.vision.v1p4beta1.AnnotateFileResponse;
import com.google.cloud.vision.v1p4beta1.BatchAnnotateFilesRequest;
import com.google.cloud.vision.v1p4beta1.BatchAnnotateFilesResponse;
import com.google.cloud.vision.v1p4beta1.Block;
import com.google.cloud.vision.v1p4beta1.Feature;
import com.google.cloud.vision.v1p4beta1.Feature.Type;
import com.google.cloud.vision.v1p4beta1.GcsSource;
import com.google.cloud.vision.v1p4beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p4beta1.InputConfig;
import com.google.cloud.vision.v1p4beta1.Page;
import com.google.cloud.vision.v1p4beta1.Paragraph;
import com.google.cloud.vision.v1p4beta1.Symbol;
import com.google.cloud.vision.v1p4beta1.TextAnnotation;
import com.google.cloud.vision.v1p4beta1.Word;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class DetectBatchAnnotateFilesGcs {

  // Performs document feature detection on a remote PDF/TIFF/GIF file on Google Cloud Storage.
  public static void detectBatchAnnotateFilesGcs(String gcsPath) {
    // String gcsPath = "gs://Your_BUCKET_ID/path_to_your_data";

    try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
      // Annotate the first two pages and the last one (max 5 pages)
      // First page starts at 1, and not 0. Last page is -1.
      List<Integer> pages = Arrays.asList(1, 2, -1);
      GcsSource gcsSource = GcsSource.newBuilder().setUri(gcsPath).build();
      Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
      // Other supported mime types : 'image/tiff' or 'image/gif'
      InputConfig inputConfig =
          InputConfig.newBuilder().setMimeType("application/pdf").setGcsSource(gcsSource).build();
      AnnotateFileRequest request =
          AnnotateFileRequest.newBuilder()
              .addFeatures(feat)
              .setInputConfig(inputConfig)
              .addAllPages(pages)
              .build();
      List<AnnotateFileRequest> requests = new ArrayList<>();
      requests.add(request);

      BatchAnnotateFilesRequest batchAnnotateFilesRequest =
          BatchAnnotateFilesRequest.newBuilder().addAllRequests(requests).build();
      ApiFuture<BatchAnnotateFilesResponse> future =
          client.batchAnnotateFilesCallable().futureCall(batchAnnotateFilesRequest);
      BatchAnnotateFilesResponse response = future.get();

      // Getting the first response
      AnnotateFileResponse annotateFileResponse = response.getResponses(0);

      // For full list of available annotations, see http://g.co/cloud/vision/docs
      TextAnnotation textAnnotation = annotateFileResponse.getResponses(0).getFullTextAnnotation();
      for (Page page : textAnnotation.getPagesList()) {
        String pageText = "";
        for (Block block : page.getBlocksList()) {
          String blockText = "";
          for (Paragraph para : block.getParagraphsList()) {
            String paraText = "";
            for (Word word : para.getWordsList()) {
              String wordText = "";
              for (Symbol symbol : word.getSymbolsList()) {
                wordText = wordText + symbol.getText();
                System.out.format(
                    "Symbol text: %s (Confidence: %f)\n", symbol.getText(), symbol.getConfidence());
              }
              System.out.format(
                  "Word text: %s (Confidence: %f)\n\n", wordText, word.getConfidence());
              paraText = String.format("%s %s", paraText, wordText);
            }
            // Output Example using Paragraph:
            System.out.println("\nParagraph: \n" + paraText);
            System.out.format("Paragraph Confidence: %f\n", para.getConfidence());
            blockText = blockText + paraText;
          }
          pageText = pageText + blockText;
        }
      }
      System.out.println("\nComplete annotation:");
      System.out.println(textAnnotation.getText());

    } catch (Exception e) {
      System.out.println("Error during detectPdfText: \n" + e.toString());
    }
  }
}

Python

이 샘플을 시도해 보기 전에 Vision 빠른 시작: 클라이언트 라이브러리 사용의 Python 설정 안내를 따르세요. 자세한 내용은 Vision Python API 참조 문서를 확인하세요.

def detect_batch_annotate_files_uri(gcs_uri):
    """Detects document features in a PDF/TIFF/GIF file.

    While your PDF file may have several pages,
    this API can process up to 5 pages only.

    Args:
    uri: The path to the file in Google Cloud Storage (gs://...)
    """
    from google.cloud import vision_v1p4beta1 as vision
    client = vision.ImageAnnotatorClient()

    # Other supported mime_types: image/tiff' or 'image/gif'
    mime_type = 'application/pdf'
    input_config = vision.InputConfig(
        gcs_source=vision.GcsSource(uri=gcs_uri), mime_type=mime_type)

    feature = vision.Feature(
        type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
    # Annotate the first two pages and the last one (max 5 pages)
    # First page starts at 1, and not 0. Last page is -1.
    pages = [1, 2, -1]

    request = vision.AnnotateFileRequest(
        input_config=input_config,
        features=[feature],
        pages=pages)

    response = client.batch_annotate_files(requests=[request])

    for image_response in response.responses[0].responses:
        for page in image_response.full_text_annotation.pages:
            for block in page.blocks:
                print(u'\nBlock confidence: {}\n'.format(block.confidence))
                for par in block.paragraphs:
                    print(u'\tParagraph confidence: {}'.format(par.confidence))
                    for word in par.words:
                        symbol_texts = [symbol.text for symbol in word.symbols]
                        word_text = ''.join(symbol_texts)
                        print(u'\t\tWord text: {} (confidence: {})'.format(
                            word_text, word.confidence))
                        for symbol in word.symbols:
                            print(u'\t\t\tSymbol: {} (confidence: {})'.format(
                                symbol.text, symbol.confidence))

다음 단계

다른 Google Cloud 제품의 코드 샘플을 검색하고 필터링하려면 Google Cloud 샘플 브라우저를 참조하세요.