Analyze entities in a Cloud Storage file

Stay organized with collections Save and categorize content based on your preferences.

Inspect text in a file stored in Cloud Storage for known entities (proper nouns such as public figures, and landmarks), and return information about those entities.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample


func analyzeEntitiesFromGCS(ctx context.Context, gcsURI string) (*languagepb.AnalyzeEntitiesResponse, error) {
	return client.AnalyzeEntities(ctx, &languagepb.AnalyzeEntitiesRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_GcsContentUri{
				GcsContentUri: gcsURI,
			Type: languagepb.Document_PLAIN_TEXT,
		EncodingType: languagepb.EncodingType_UTF8,


// Instantiate the Language client
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // Set the GCS Content URI path to the file to be analyzed
  Document doc =
  AnalyzeEntitiesRequest request =

  AnalyzeEntitiesResponse response = language.analyzeEntities(request);

  // Print the response
  for (Entity entity : response.getEntitiesList()) {
    System.out.printf("Entity: %s\n", entity.getName());
    System.out.printf("Salience: %.3f\n", entity.getSalience());
    System.out.println("Metadata: ");
    for (Map.Entry<String, String> entry : entity.getMetadataMap().entrySet()) {
      System.out.printf("%s : %s", entry.getKey(), entry.getValue());
    for (EntityMention mention : entity.getMentionsList()) {
      System.out.printf("Begin offset: %d\n", mention.getText().getBeginOffset());
      System.out.printf("Content: %s\n", mention.getText().getContent());
      System.out.printf("Type: %s\n\n", mention.getType());


// Imports the Google Cloud client library
const language = require('@google-cloud/language');

// Creates a client
const client = new language.LanguageServiceClient();

 * TODO(developer): Uncomment the following lines to run this code
// const bucketName = 'Your bucket name, e.g. my-bucket';
// const fileName = 'Your file name, e.g. my-file.txt';

// Prepares a document, representing a text file in Cloud Storage
const document = {
  gcsContentUri: `gs://${bucketName}/${fileName}`,
  type: 'PLAIN_TEXT',

// Detects entities in the document
const [result] = await client.analyzeEntities({document});
const entities = result.entities;

entities.forEach(entity => {
  console.log(` - Type: ${entity.type}, Salience: ${entity.salience}`);
  if (entity.metadata && entity.metadata.wikipedia_url) {
    console.log(` - Wikipedia URL: ${entity.metadata.wikipedia_url}`);


use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\Entity\Type as EntityType;

 * @param string $uri The cloud storage object to analyze (gs://your-bucket-name/your-object-name)
function analyze_entities_from_file(string $uri): void
    // Create the Natural Language client
    $languageServiceClient = new LanguageServiceClient();

    // Create a new Document, pass GCS URI and set type to PLAIN_TEXT
    $document = (new Document())

    // Call the analyzeEntities function
    $response = $languageServiceClient->analyzeEntities($document, []);
    $entities = $response->getEntities();
    // Print out information about each entity
    foreach ($entities as $entity) {
        printf('Name: %s' . PHP_EOL, $entity->getName());
        printf('Type: %s' . PHP_EOL, EntityType::name($entity->getType()));
        printf('Salience: %s' . PHP_EOL, $entity->getSalience());
        if ($entity->getMetadata()->offsetExists('wikipedia_url')) {
            printf('Wikipedia URL: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('wikipedia_url'));
        if ($entity->getMetadata()->offsetExists('mid')) {
            printf('Knowledge Graph MID: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('mid'));


from import language_v1

def sample_analyze_entities(gcs_content_uri):
    Analyzing Entities in text file stored in Cloud Storage

      gcs_content_uri Google Cloud Storage URI where the file content is located.
      e.g. gs://[Your Bucket]/[Path to File]

    client = language_v1.LanguageServiceClient()

    # gcs_content_uri = 'gs://cloud-samples-data/language/entity.txt'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    language = "en"
    document = {
        "gcs_content_uri": gcs_content_uri,
        "type_": type_,
        "language": language,

    # Available values: NONE, UTF8, UTF16, UTF32
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(
        request={"document": document, "encoding_type": encoding_type}
    # Loop through entitites returned from the API
    for entity in response.entities:
        print("Representative name for the entity: {}".format(
        # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al
        print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name))
        # Get the salience score associated with the entity in the [0, 1.0] range
        print("Salience score: {}".format(entity.salience))
        # Loop over the metadata associated with entity. For many known entities,
        # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid).
        # Some entity types may have additional metadata, e.g. ADDRESS entities
        # may have metadata for the address street_name, postal_code, et al.
        for metadata_name, metadata_value in entity.metadata.items():
            print("{}: {}".format(metadata_name, metadata_value))

        # Loop over the mentions of this entity in the input document.
        # The API currently supports proper noun mentions.
        for mention in entity.mentions:
            print("Mention text: {}".format(mention.text.content))
            # Get the mention type, e.g. PROPER for proper noun
                "Mention type: {}".format(

    # Get the language of the text, which will be the same as
    # the language specified in the request or, if not specified,
    # the automatically-detected language.
    print("Language of the text: {}".format(response.language))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.