Cloud Storage ファイル内のエンティティの分析

Cloud Storage に保存されているファイル内のテキストに既知のエンティティ(著名人、ランドマークなどの固有名詞)がないか調べて、それらのエンティティに関する情報を返します。

このコードサンプルが含まれるドキュメント ページ

コンテキストで使用されているコードサンプルを見るには、次のドキュメントをご覧ください。

コードサンプル

Go


func analyzeEntitiesFromGCS(ctx context.Context, gcsURI string) (*languagepb.AnalyzeEntitiesResponse, error) {
	return client.AnalyzeEntities(ctx, &languagepb.AnalyzeEntitiesRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_GcsContentUri{
				GcsContentUri: gcsURI,
			},
			Type: languagepb.Document_PLAIN_TEXT,
		},
		EncodingType: languagepb.EncodingType_UTF8,
	})
}

Java

// Instantiate the Language client com.google.cloud.language.v1.LanguageServiceClient
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // set the GCS Content URI path to the file to be analyzed
  Document doc =
      Document.newBuilder().setGcsContentUri(gcsUri).setType(Type.PLAIN_TEXT).build();
  AnalyzeEntitiesRequest request =
      AnalyzeEntitiesRequest.newBuilder()
          .setDocument(doc)
          .setEncodingType(EncodingType.UTF16)
          .build();

  AnalyzeEntitiesResponse response = language.analyzeEntities(request);

  // Print the response
  for (Entity entity : response.getEntitiesList()) {
    System.out.printf("Entity: %s\n", entity.getName());
    System.out.printf("Salience: %.3f\n", entity.getSalience());
    System.out.println("Metadata: ");
    for (Map.Entry<String, String> entry : entity.getMetadataMap().entrySet()) {
      System.out.printf("%s : %s", entry.getKey(), entry.getValue());
    }
    for (EntityMention mention : entity.getMentionsList()) {
      System.out.printf("Begin offset: %d\n", mention.getText().getBeginOffset());
      System.out.printf("Content: %s\n", mention.getText().getContent());
      System.out.printf("Type: %s\n\n", mention.getType());
    }
  }
}

Node.js

// Imports the Google Cloud client library
const language = require('@google-cloud/language');

// Creates a client
const client = new language.LanguageServiceClient();

/**
 * TODO(developer): Uncomment the following lines to run this code
 */
// const bucketName = 'Your bucket name, e.g. my-bucket';
// const fileName = 'Your file name, e.g. my-file.txt';

// Prepares a document, representing a text file in Cloud Storage
const document = {
  gcsContentUri: `gs://${bucketName}/${fileName}`,
  type: 'PLAIN_TEXT',
};

// Detects entities in the document
const [result] = await client.analyzeEntities({document});
const entities = result.entities;

console.log('Entities:');
entities.forEach(entity => {
  console.log(entity.name);
  console.log(` - Type: ${entity.type}, Salience: ${entity.salience}`);
  if (entity.metadata && entity.metadata.wikipedia_url) {
    console.log(` - Wikipedia URL: ${entity.metadata.wikipedia_url}`);
  }
});

PHP

use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\Entity\Type as EntityType;

/** Uncomment and populate these variables in your code */
// $uri = 'The cloud storage object to analyze (gs://your-bucket-name/your-object-name)';

// Create the Natural Language client
$languageServiceClient = new LanguageServiceClient();
try {
    // Create a new Document, pass GCS URI and set type to PLAIN_TEXT
    $document = (new Document())
        ->setGcsContentUri($uri)
        ->setType(Type::PLAIN_TEXT);

    // Call the analyzeEntities function
    $response = $languageServiceClient->analyzeEntities($document, []);
    $entities = $response->getEntities();
    // Print out information about each entity
    foreach ($entities as $entity) {
        printf('Name: %s' . PHP_EOL, $entity->getName());
        printf('Type: %s' . PHP_EOL, EntityType::name($entity->getType()));
        printf('Salience: %s' . PHP_EOL, $entity->getSalience());
        if ($entity->getMetadata()->offsetExists('wikipedia_url')) {
            printf('Wikipedia URL: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('wikipedia_url'));
        }
        if ($entity->getMetadata()->offsetExists('mid')) {
            printf('Knowledge Graph MID: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('mid'));
        }
        printf(PHP_EOL);
    }
} finally {
    $languageServiceClient->close();
}

Python

from google.cloud import language_v1

def sample_analyze_entities(gcs_content_uri):
    """
    Analyzing Entities in text file stored in Cloud Storage

    Args:
      gcs_content_uri Google Cloud Storage URI where the file content is located.
      e.g. gs://[Your Bucket]/[Path to File]
    """

    client = language_v1.LanguageServiceClient()

    # gcs_content_uri = 'gs://cloud-samples-data/language/entity.txt'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    # https://cloud.google.com/natural-language/docs/languages
    language = "en"
    document = {"gcs_content_uri": gcs_content_uri, "type_": type_, "language": language}

    # Available values: NONE, UTF8, UTF16, UTF32
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(request = {'document': document, 'encoding_type': encoding_type})
    # Loop through entitites returned from the API
    for entity in response.entities:
        print(u"Representative name for the entity: {}".format(entity.name))
        # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al
        print(u"Entity type: {}".format(language_v1.Entity.Type(entity.type_).name))
        # Get the salience score associated with the entity in the [0, 1.0] range
        print(u"Salience score: {}".format(entity.salience))
        # Loop over the metadata associated with entity. For many known entities,
        # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid).
        # Some entity types may have additional metadata, e.g. ADDRESS entities
        # may have metadata for the address street_name, postal_code, et al.
        for metadata_name, metadata_value in entity.metadata.items():
            print(u"{}: {}".format(metadata_name, metadata_value))

        # Loop over the mentions of this entity in the input document.
        # The API currently supports proper noun mentions.
        for mention in entity.mentions:
            print(u"Mention text: {}".format(mention.text.content))
            # Get the mention type, e.g. PROPER for proper noun
            print(
                u"Mention type: {}".format(language_v1.EntityMention.Type(mention.type_).name)
            )

    # Get the language of the text, which will be the same as
    # the language specified in the request or, if not specified,
    # the automatically-detected language.
    print(u"Language of the text: {}".format(response.language))

次のステップ

他の Google Cloud プロダクトに関連するコードサンプルの検索およびフィルタ検索を行うには、Google Cloud のサンプルをご覧ください。