Cloud Storage ファイルのエンティティ分析

コレクションでコンテンツを整理 必要に応じて、コンテンツの保存と分類を行います。

Cloud Storage に保存されているファイル内のテキストに既知のエンティティ(著名人、ランドマークなどの固有名詞)がないか調べて、それらのエンティティに関する情報を返します。





func analyzeEntitiesFromGCS(ctx context.Context, gcsURI string) (*languagepb.AnalyzeEntitiesResponse, error) {
	return client.AnalyzeEntities(ctx, &languagepb.AnalyzeEntitiesRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_GcsContentUri{
				GcsContentUri: gcsURI,
			Type: languagepb.Document_PLAIN_TEXT,
		EncodingType: languagepb.EncodingType_UTF8,


// Instantiate the Language client
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // Set the GCS Content URI path to the file to be analyzed
  Document doc =
  AnalyzeEntitiesRequest request =

  AnalyzeEntitiesResponse response = language.analyzeEntities(request);

  // Print the response
  for (Entity entity : response.getEntitiesList()) {
    System.out.printf("Entity: %s\n", entity.getName());
    System.out.printf("Salience: %.3f\n", entity.getSalience());
    System.out.println("Metadata: ");
    for (Map.Entry<String, String> entry : entity.getMetadataMap().entrySet()) {
      System.out.printf("%s : %s", entry.getKey(), entry.getValue());
    for (EntityMention mention : entity.getMentionsList()) {
      System.out.printf("Begin offset: %d\n", mention.getText().getBeginOffset());
      System.out.printf("Content: %s\n", mention.getText().getContent());
      System.out.printf("Type: %s\n\n", mention.getType());


// Imports the Google Cloud client library
const language = require('@google-cloud/language');

// Creates a client
const client = new language.LanguageServiceClient();

 * TODO(developer): Uncomment the following lines to run this code
// const bucketName = 'Your bucket name, e.g. my-bucket';
// const fileName = 'Your file name, e.g. my-file.txt';

// Prepares a document, representing a text file in Cloud Storage
const document = {
  gcsContentUri: `gs://${bucketName}/${fileName}`,
  type: 'PLAIN_TEXT',

// Detects entities in the document
const [result] = await client.analyzeEntities({document});
const entities = result.entities;

entities.forEach(entity => {
  console.log(` - Type: ${entity.type}, Salience: ${entity.salience}`);
  if (entity.metadata && entity.metadata.wikipedia_url) {
    console.log(` - Wikipedia URL: ${entity.metadata.wikipedia_url}`);


use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\Entity\Type as EntityType;

 * @param string $uri The cloud storage object to analyze (gs://your-bucket-name/your-object-name)
function analyze_entities_from_file(string $uri): void
    // Create the Natural Language client
    $languageServiceClient = new LanguageServiceClient();

    // Create a new Document, pass GCS URI and set type to PLAIN_TEXT
    $document = (new Document())

    // Call the analyzeEntities function
    $response = $languageServiceClient->analyzeEntities($document, []);
    $entities = $response->getEntities();
    // Print out information about each entity
    foreach ($entities as $entity) {
        printf('Name: %s' . PHP_EOL, $entity->getName());
        printf('Type: %s' . PHP_EOL, EntityType::name($entity->getType()));
        printf('Salience: %s' . PHP_EOL, $entity->getSalience());
        if ($entity->getMetadata()->offsetExists('wikipedia_url')) {
            printf('Wikipedia URL: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('wikipedia_url'));
        if ($entity->getMetadata()->offsetExists('mid')) {
            printf('Knowledge Graph MID: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('mid'));


from import language_v1

def sample_analyze_entities(gcs_content_uri):
    Analyzing Entities in text file stored in Cloud Storage

      gcs_content_uri Google Cloud Storage URI where the file content is located.
      e.g. gs://[Your Bucket]/[Path to File]

    client = language_v1.LanguageServiceClient()

    # gcs_content_uri = 'gs://cloud-samples-data/language/entity.txt'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    language = "en"
    document = {
        "gcs_content_uri": gcs_content_uri,
        "type_": type_,
        "language": language,

    # Available values: NONE, UTF8, UTF16, UTF32
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(
        request={"document": document, "encoding_type": encoding_type}
    # Loop through entitites returned from the API
    for entity in response.entities:
        print("Representative name for the entity: {}".format(
        # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al
        print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name))
        # Get the salience score associated with the entity in the [0, 1.0] range
        print("Salience score: {}".format(entity.salience))
        # Loop over the metadata associated with entity. For many known entities,
        # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid).
        # Some entity types may have additional metadata, e.g. ADDRESS entities
        # may have metadata for the address street_name, postal_code, et al.
        for metadata_name, metadata_value in entity.metadata.items():
            print("{}: {}".format(metadata_name, metadata_value))

        # Loop over the mentions of this entity in the input document.
        # The API currently supports proper noun mentions.
        for mention in entity.mentions:
            print("Mention text: {}".format(mention.text.content))
            # Get the mention type, e.g. PROPER for proper noun
                "Mention type: {}".format(

    # Get the language of the text, which will be the same as
    # the language specified in the request or, if not specified,
    # the automatically-detected language.
    print("Language of the text: {}".format(response.language))


他の Google Cloud プロダクトに関連するコードサンプルの検索およびフィルタ検索を行うには、Google Cloud のサンプルをご覧ください。