콘텐츠 분류

콘텐츠 분류는 문서를 분석하고, 문서에서 찾은 텍스트에 적용되는 콘텐츠 카테고리의 목록을 반환합니다. 문서에서 콘텐츠를 분류하려면 classifyText 메서드를 호출하세요.

classifyText 메서드에 대해 반환되는 콘텐츠 카테고리의 전체 목록은 여기에서 찾아볼 수 있습니다.

이 섹션에서는 문서에서 콘텐츠를 분류하는 방법을 설명합니다.

콘텐츠 분류

다음은 문자열로 제공된 콘텐츠를 분류하는 예입니다.

프로토콜

문서에서 콘텐츠를 분류하려면 documents:classifyText REST 메서드에 POST 요청을 하고 다음 예시와 같이 적절한 요청 본문을 제공해야 합니다.

이 예시에서는 gcloud auth application-default print-access-token 명령어를 사용하여 Google Cloud Platform Cloud SDK를 사용하는 프로젝트용으로 설정된 서비스 계정에 대한 액세스 토큰을 얻습니다. Cloud SDK 설치 및 서비스 계정을 통한 프로젝트 설정 지침을 보려면 빠른 시작을 참조하세요.

curl -X POST \
     -H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
     -H "Content-Type: application/json; charset=utf-8" \
     --data "{
  'document':{
    'type':'PLAIN_TEXT',
    'content':'Google, headquartered in Mountain View, unveiled the new Android
    phone at the Consumer Electronic Show.  Sundar Pichai said in his keynote
    that users love their new Android phones.'
  }
}" "https://language.googleapis.com/v1/documents:classifyText"

C#

        private static void ClassifyTextFromText(string text)
        {
            var client = LanguageServiceClient.Create();
            var response = client.ClassifyText(new Document()
            {
                Content = text,
                Type = Document.Types.Type.PlainText
            });
            WriteCategories(response.Categories);
        }

        private static void WriteCategories(IEnumerable<ClassificationCategory> categories)
        {
            Console.WriteLine("Categories:");
            foreach (var category in categories)
            {
                Console.WriteLine($"\tCategory: {category.Name}");
                Console.WriteLine($"\t\tConfidence: {category.Confidence}");
            }
        }

Go


func classifyText(ctx context.Context, client *language.Client, text string) (*languagepb.ClassifyTextResponse, error) {
	return client.ClassifyText(ctx, &languagepb.ClassifyTextRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_Content{
				Content: text,
			},
			Type: languagepb.Document_PLAIN_TEXT,
		},
	})
}

자바

// Instantiate the Language client com.google.cloud.language.v1.LanguageServiceClient
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // set content to the text string
  Document doc = Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build();
  ClassifyTextRequest request = ClassifyTextRequest.newBuilder().setDocument(doc).build();
  // detect categories in the given text
  ClassifyTextResponse response = language.classifyText(request);

  for (ClassificationCategory category : response.getCategoriesList()) {
    System.out.printf(
        "Category name : %s, Confidence : %.3f\n",
        category.getName(), category.getConfidence());
  }
}

Node.js

// Imports the Google Cloud client library
const language = require('@google-cloud/language');

// Creates a client
const client = new language.LanguageServiceClient();

/**
 * TODO(developer): Uncomment the following line to run this code.
 */
// const text = 'Your text to analyze, e.g. Hello, world!';

// Prepares a document, representing the provided text
const document = {
  content: text,
  type: 'PLAIN_TEXT',
};

// Classifies text in the document
const [classification] = await client.classifyText({document});
console.log('Categories:');
classification.categories.forEach(category => {
  console.log(`Name: ${category.name}, Confidence: ${category.confidence}`);
});

Python

from google.cloud import language_v1
from google.cloud.language_v1 import enums

def sample_classify_text(text_content):
    """
    Classifying Content in a String

    Args:
      text_content The text content to analyze. Must include at least 20 words.
    """

    client = language_v1.LanguageServiceClient()

    # text_content = 'That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.'

    # Available types: PLAIN_TEXT, HTML
    type_ = enums.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    # https://cloud.google.com/natural-language/docs/languages
    language = "en"
    document = {"content": text_content, "type": type_, "language": language}

    response = client.classify_text(document)
    # Loop through classified categories returned from the API
    for category in response.categories:
        # Get the name of the category representing the document.
        # See the predefined taxonomy of categories:
        # https://cloud.google.com/natural-language/docs/categories
        print(u"Category name: {}".format(category.name))
        # Get the confidence. Number representing how certain the classifier
        # is that this category represents the provided text.
        print(u"Confidence: {}".format(category.confidence))

PHP

use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;

/** Uncomment and populate these variables in your code */
// $text = 'The text to analyze.';

// Make sure we have enough words (20+) to call classifyText
if (str_word_count($text) < 20) {
    printf('20+ words are required to classify text.' . PHP_EOL);
    return;
}
$languageServiceClient = new LanguageServiceClient();
try {
    // Create a new Document, add text as content and set type to PLAIN_TEXT
    $document = (new Document())
        ->setContent($text)
        ->setType(Type::PLAIN_TEXT);

    // Call the analyzeSentiment function
    $response = $languageServiceClient->classifyText($document);
    $categories = $response->getCategories();
    // Print document information
    foreach ($categories as $category) {
        printf('Category Name: %s' . PHP_EOL, $category->getName());
        printf('Confidence: %s' . PHP_EOL, $category->getConfidence());
        print(PHP_EOL);
    }
} finally {
    $languageServiceClient->close();
}

Ruby

# text_content = "Text to classify"

require "google/cloud/language"

language = Google::Cloud::Language.language_service

document = { content: text_content, type: :PLAIN_TEXT }
response = language.classify_text document: document

categories = response.categories

categories.each do |category|
  puts "Name: #{category.name} Confidence: #{category.confidence}"
end

Google Cloud Storage에서 콘텐츠 분류

다음은 Google Cloud Storage에서 텍스트 파일에 저장된 콘텐츠를 분류하는 예입니다.

프로토콜

Google Cloud Storage에 저장된 문서의 콘텐츠를 분류하려면 documents:classifyText REST 메서드에 POST 요청을 하고 다음 예시와 같이 적절한 요청 본문 및 문서 경로를 제공해야 합니다.

curl -X POST \
     -H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
     -H "Content-Type: application/json; charset=utf-8" \
     --data "{
  'document':{
    'type':'PLAIN_TEXT',
    'gcsContentUri':'gs://<bucket-name>/<object-name>'
  }
}" "https://language.googleapis.com/v1/documents:classifyText"

C#

private static void ClassifyTextFromFile(string gcsUri)
{
    var client = LanguageServiceClient.Create();
    var response = client.ClassifyText(new Document()
    {
        GcsContentUri = gcsUri,
        Type = Document.Types.Type.PlainText
    });
    WriteCategories(response.Categories);
}
private static void WriteCategories(IEnumerable<ClassificationCategory> categories)
{
    Console.WriteLine("Categories:");
    foreach (var category in categories)
    {
        Console.WriteLine($"\tCategory: {category.Name}");
        Console.WriteLine($"\t\tConfidence: {category.Confidence}");
    }
}

Go


func classifyTextFromGCS(ctx context.Context, gcsURI string) (*languagepb.ClassifyTextResponse, error) {
	return client.ClassifyText(ctx, &languagepb.ClassifyTextRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_GcsContentUri{
				GcsContentUri: gcsURI,
			},
			Type: languagepb.Document_PLAIN_TEXT,
		},
	})
}

자바

// Instantiate the Language client com.google.cloud.language.v1.LanguageServiceClient
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // set the GCS content URI path
  Document doc =
      Document.newBuilder().setGcsContentUri(gcsUri).setType(Type.PLAIN_TEXT).build();
  ClassifyTextRequest request = ClassifyTextRequest.newBuilder().setDocument(doc).build();
  // detect categories in the given file
  ClassifyTextResponse response = language.classifyText(request);

  for (ClassificationCategory category : response.getCategoriesList()) {
    System.out.printf(
        "Category name : %s, Confidence : %.3f\n",
        category.getName(), category.getConfidence());
  }
}

Node.js

// Imports the Google Cloud client library.
const language = require('@google-cloud/language');

// Creates a client.
const client = new language.LanguageServiceClient();

/**
 * TODO(developer): Uncomment the following lines to run this code
 */
// const bucketName = 'Your bucket name, e.g. my-bucket';
// const fileName = 'Your file name, e.g. my-file.txt';

// Prepares a document, representing a text file in Cloud Storage
const document = {
  gcsContentUri: `gs://${bucketName}/${fileName}`,
  type: 'PLAIN_TEXT',
};

// Classifies text in the document
const [classification] = await client.classifyText({document});

console.log('Categories:');
classification.categories.forEach(category => {
  console.log(`Name: ${category.name}, Confidence: ${category.confidence}`);
});

Python

from google.cloud import language_v1
from google.cloud.language_v1 import enums

def sample_classify_text(gcs_content_uri):
    """
    Classifying Content in text file stored in Cloud Storage

    Args:
      gcs_content_uri Google Cloud Storage URI where the file content is located.
      e.g. gs://[Your Bucket]/[Path to File]
      The text file must include at least 20 words.
    """

    client = language_v1.LanguageServiceClient()

    # gcs_content_uri = 'gs://cloud-samples-data/language/classify-entertainment.txt'

    # Available types: PLAIN_TEXT, HTML
    type_ = enums.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    # https://cloud.google.com/natural-language/docs/languages
    language = "en"
    document = {"gcs_content_uri": gcs_content_uri, "type": type_, "language": language}

    response = client.classify_text(document)
    # Loop through classified categories returned from the API
    for category in response.categories:
        # Get the name of the category representing the document.
        # See the predefined taxonomy of categories:
        # https://cloud.google.com/natural-language/docs/categories
        print(u"Category name: {}".format(category.name))
        # Get the confidence. Number representing how certain the classifier
        # is that this category represents the provided text.
        print(u"Confidence: {}".format(category.confidence))

PHP

use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;

/** Uncomment and populate these variables in your code */
// $uri = 'The cloud storage object to analyze (gs://your-bucket-name/your-object-name)';

$languageServiceClient = new LanguageServiceClient();
try {
    // Create a new Document, pass GCS URI and set type to PLAIN_TEXT
    $document = (new Document())
        ->setGcsContentUri($uri)
        ->setType(Type::PLAIN_TEXT);

    // Call the analyzeSentiment function
    $response = $languageServiceClient->classifyText($document);
    $categories = $response->getCategories();
    // Print document information
    foreach ($categories as $category) {
        printf('Category Name: %s' . PHP_EOL, $category->getName());
        printf('Confidence: %s' . PHP_EOL, $category->getConfidence());
        print(PHP_EOL);
    }
} finally {
    $languageServiceClient->close();
}

Ruby

# storage_path = "Path to file in Google Cloud Storage, eg. gs://bucket/file"

require "google/cloud/language"

language = Google::Cloud::Language.language_service

document = { gcs_content_uri: storage_path, type: :PLAIN_TEXT }
response = language.classify_text document: document

categories = response.categories

categories.each do |category|
  puts "Name: #{category.name} Confidence: #{category.confidence}"
end