Analyze syntax of a string

Break up the given text into a series of sentences and tokens (generally, words) and provide linguistic information about those tokens.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample


func analyzeSyntax(ctx context.Context, client *language.Client, text string) (*languagepb.AnnotateTextResponse, error) {
	return client.AnnotateText(ctx, &languagepb.AnnotateTextRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_Content{
				Content: text,
			Type: languagepb.Document_PLAIN_TEXT,
		Features: &languagepb.AnnotateTextRequest_Features{
			ExtractSyntax: true,
		EncodingType: languagepb.EncodingType_UTF8,


// Instantiate the Language client
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  Document doc = Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build();
  AnalyzeSyntaxRequest request =
  // analyze the syntax in the given text
  AnalyzeSyntaxResponse response = language.analyzeSyntax(request);
  // print the response
  for (Token token : response.getTokensList()) {
    System.out.printf("\tText: %s\n", token.getText().getContent());
    System.out.printf("\tBeginOffset: %d\n", token.getText().getBeginOffset());
    System.out.printf("Lemma: %s\n", token.getLemma());
    System.out.printf("PartOfSpeechTag: %s\n", token.getPartOfSpeech().getTag());
    System.out.printf("\tAspect: %s\n", token.getPartOfSpeech().getAspect());
    System.out.printf("\tCase: %s\n", token.getPartOfSpeech().getCase());
    System.out.printf("\tForm: %s\n", token.getPartOfSpeech().getForm());
    System.out.printf("\tGender: %s\n", token.getPartOfSpeech().getGender());
    System.out.printf("\tMood: %s\n", token.getPartOfSpeech().getMood());
    System.out.printf("\tNumber: %s\n", token.getPartOfSpeech().getNumber());
    System.out.printf("\tPerson: %s\n", token.getPartOfSpeech().getPerson());
    System.out.printf("\tProper: %s\n", token.getPartOfSpeech().getProper());
    System.out.printf("\tReciprocity: %s\n", token.getPartOfSpeech().getReciprocity());
    System.out.printf("\tTense: %s\n", token.getPartOfSpeech().getTense());
    System.out.printf("\tVoice: %s\n", token.getPartOfSpeech().getVoice());
    System.out.printf("\tHeadTokenIndex: %d\n", token.getDependencyEdge().getHeadTokenIndex());
    System.out.printf("\tLabel: %s\n\n", token.getDependencyEdge().getLabel());
  return response.getTokensList();


// Imports the Google Cloud client library
const language = require('@google-cloud/language');

// Creates a client
const client = new language.LanguageServiceClient();

 * TODO(developer): Uncomment the following line to run this code.
// const text = 'Your text to analyze, e.g. Hello, world!';

// Prepares a document, representing the provided text
const document = {
  content: text,
  type: 'PLAIN_TEXT',

// Need to specify an encodingType to receive word offsets
const encodingType = 'UTF8';

// Detects the sentiment of the document
const [syntax] = await client.analyzeSyntax({document, encodingType});

syntax.tokens.forEach(part => {
  console.log(`${part.partOfSpeech.tag}: ${part.text.content}`);
  console.log('Morphology:', part.partOfSpeech);


use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\PartOfSpeech\Tag;

/** Uncomment and populate these variables in your code */
// $text = 'The text to analyze.';

// Create the Natural Language client
$languageServiceClient = new LanguageServiceClient();

try {
    // Create a new Document, add text as content and set type to PLAIN_TEXT
    $document = (new Document())

    // Call the analyzeEntities function
    $response = $languageServiceClient->analyzeSyntax($document, []);
    $tokens = $response->getTokens();
    // Print out information about each entity
    foreach ($tokens as $token) {
        printf('Token text: %s' . PHP_EOL, $token->getText()->getContent());
        printf('Token part of speech: %s' . PHP_EOL, Tag::name($token->getPartOfSpeech()->getTag()));
} finally {


from import language_v1

def sample_analyze_syntax(text_content):
    Analyzing Syntax in a String

      text_content The text content to analyze

    client = language_v1.LanguageServiceClient()

    # text_content = 'This is a short sentence.'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    language = "en"
    document = {"content": text_content, "type_": type_, "language": language}

    # Available values: NONE, UTF8, UTF16, UTF32
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_syntax(request = {'document': document, 'encoding_type': encoding_type})
    # Loop through tokens returned from the API
    for token in response.tokens:
        # Get the text content of this token. Usually a word or punctuation.
        text = token.text
        print(u"Token text: {}".format(text.content))
            u"Location of this token in overall document: {}".format(text.begin_offset)
        # Get the part of speech information for this token.
        # Part of speech is defined in:
        part_of_speech = token.part_of_speech
        # Get the tag, e.g. NOUN, ADJ for Adjective, et al.
            u"Part of Speech tag: {}".format(
        # Get the voice, e.g. ACTIVE or PASSIVE
        print(u"Voice: {}".format(language_v1.PartOfSpeech.Voice(part_of_speech.voice).name))
        # Get the tense, e.g. PAST, FUTURE, PRESENT, et al.
        print(u"Tense: {}".format(language_v1.PartOfSpeech.Tense(part_of_speech.tense).name))
        # See API reference for additional Part of Speech information available
        # Get the lemma of the token. Wikipedia lemma description
        print(u"Lemma: {}".format(token.lemma))
        # Get the dependency tree parse information for this token.
        # For more information on dependency labels:
        dependency_edge = token.dependency_edge
        print(u"Head token index: {}".format(dependency_edge.head_token_index))
            u"Label: {}".format(language_v1.DependencyEdge.Label(dependency_edge.label).name)

    # Get the language of the text, which will be the same as
    # the language specified in the request or, if not specified,
    # the automatically-detected language.
    print(u"Language of the text: {}".format(response.language))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.