Teilen Sie den betreffenden Text in eine Reihe von Sätzen und Tokens (im Allgemeinen sind das Wörter) auf und geben Sie linguistische Informationen zu den Tokens an.
Weitere Informationen
Eine ausführliche Dokumentation, die dieses Codebeispiel enthält, finden Sie hier:
Codebeispiel
Go
func analyzeSyntax(ctx context.Context, client *language.Client, text string) (*languagepb.AnnotateTextResponse, error) {
return client.AnnotateText(ctx, &languagepb.AnnotateTextRequest{
Document: &languagepb.Document{
Source: &languagepb.Document_Content{
Content: text,
},
Type: languagepb.Document_PLAIN_TEXT,
},
Features: &languagepb.AnnotateTextRequest_Features{
ExtractSyntax: true,
},
EncodingType: languagepb.EncodingType_UTF8,
})
}
Java
// Instantiate the Language client com.google.cloud.language.v1.LanguageServiceClient
try (LanguageServiceClient language = LanguageServiceClient.create()) {
Document doc = Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build();
AnalyzeSyntaxRequest request =
AnalyzeSyntaxRequest.newBuilder()
.setDocument(doc)
.setEncodingType(EncodingType.UTF16)
.build();
// Analyze the syntax in the given text
AnalyzeSyntaxResponse response = language.analyzeSyntax(request);
// Print the response
for (Token token : response.getTokensList()) {
System.out.printf("\tText: %s\n", token.getText().getContent());
System.out.printf("\tBeginOffset: %d\n", token.getText().getBeginOffset());
System.out.printf("Lemma: %s\n", token.getLemma());
System.out.printf("PartOfSpeechTag: %s\n", token.getPartOfSpeech().getTag());
System.out.printf("\tAspect: %s\n", token.getPartOfSpeech().getAspect());
System.out.printf("\tCase: %s\n", token.getPartOfSpeech().getCase());
System.out.printf("\tForm: %s\n", token.getPartOfSpeech().getForm());
System.out.printf("\tGender: %s\n", token.getPartOfSpeech().getGender());
System.out.printf("\tMood: %s\n", token.getPartOfSpeech().getMood());
System.out.printf("\tNumber: %s\n", token.getPartOfSpeech().getNumber());
System.out.printf("\tPerson: %s\n", token.getPartOfSpeech().getPerson());
System.out.printf("\tProper: %s\n", token.getPartOfSpeech().getProper());
System.out.printf("\tReciprocity: %s\n", token.getPartOfSpeech().getReciprocity());
System.out.printf("\tTense: %s\n", token.getPartOfSpeech().getTense());
System.out.printf("\tVoice: %s\n", token.getPartOfSpeech().getVoice());
System.out.println("DependencyEdge");
System.out.printf("\tHeadTokenIndex: %d\n", token.getDependencyEdge().getHeadTokenIndex());
System.out.printf("\tLabel: %s\n\n", token.getDependencyEdge().getLabel());
}
return response.getTokensList();
}
Node.js
// Imports the Google Cloud client library
const language = require('@google-cloud/language');
// Creates a client
const client = new language.LanguageServiceClient();
/**
* TODO(developer): Uncomment the following line to run this code.
*/
// const text = 'Your text to analyze, e.g. Hello, world!';
// Prepares a document, representing the provided text
const document = {
content: text,
type: 'PLAIN_TEXT',
};
// Need to specify an encodingType to receive word offsets
const encodingType = 'UTF8';
// Detects the sentiment of the document
const [syntax] = await client.analyzeSyntax({document, encodingType});
console.log('Tokens:');
syntax.tokens.forEach(part => {
console.log(`${part.partOfSpeech.tag}: ${part.text.content}`);
console.log('Morphology:', part.partOfSpeech);
});
PHP
use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\PartOfSpeech\Tag;
/**
* @param string $text The text to analyze
*/
function analyze_syntax(string $text): void
{
// Create the Natural Language client
$languageServiceClient = new LanguageServiceClient();
// Create a new Document, add text as content and set type to PLAIN_TEXT
$document = (new Document())
->setContent($text)
->setType(Type::PLAIN_TEXT);
// Call the analyzeEntities function
$response = $languageServiceClient->analyzeSyntax($document, []);
$tokens = $response->getTokens();
// Print out information about each entity
foreach ($tokens as $token) {
printf('Token text: %s' . PHP_EOL, $token->getText()->getContent());
printf('Token part of speech: %s' . PHP_EOL, Tag::name($token->getPartOfSpeech()->getTag()));
print(PHP_EOL);
}
}
Python
from google.cloud import language_v1
def sample_analyze_syntax(text_content):
"""
Analyzing Syntax in a String
Args:
text_content The text content to analyze
"""
client = language_v1.LanguageServiceClient()
# text_content = 'This is a short sentence.'
# Available types: PLAIN_TEXT, HTML
type_ = language_v1.Document.Type.PLAIN_TEXT
# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language = "en"
document = {"content": text_content, "type_": type_, "language": language}
# Available values: NONE, UTF8, UTF16, UTF32
encoding_type = language_v1.EncodingType.UTF8
response = client.analyze_syntax(
request={"document": document, "encoding_type": encoding_type}
)
# Loop through tokens returned from the API
for token in response.tokens:
# Get the text content of this token. Usually a word or punctuation.
text = token.text
print("Token text: {}".format(text.content))
print(
"Location of this token in overall document: {}".format(text.begin_offset)
)
# Get the part of speech information for this token.
# Part of speech is defined in:
# http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
part_of_speech = token.part_of_speech
# Get the tag, e.g. NOUN, ADJ for Adjective, et al.
print(
"Part of Speech tag: {}".format(
language_v1.PartOfSpeech.Tag(part_of_speech.tag).name
)
)
# Get the voice, e.g. ACTIVE or PASSIVE
print(
"Voice: {}".format(
language_v1.PartOfSpeech.Voice(part_of_speech.voice).name
)
)
# Get the tense, e.g. PAST, FUTURE, PRESENT, et al.
print(
"Tense: {}".format(
language_v1.PartOfSpeech.Tense(part_of_speech.tense).name
)
)
# See API reference for additional Part of Speech information available
# Get the lemma of the token. Wikipedia lemma description
# https://en.wikipedia.org/wiki/Lemma_(morphology)
print("Lemma: {}".format(token.lemma))
# Get the dependency tree parse information for this token.
# For more information on dependency labels:
# http://www.aclweb.org/anthology/P13-2017
dependency_edge = token.dependency_edge
print("Head token index: {}".format(dependency_edge.head_token_index))
print(
"Label: {}".format(
language_v1.DependencyEdge.Label(dependency_edge.label).name
)
)
# Get the language of the text, which will be the same as
# the language specified in the request or, if not specified,
# the automatically-detected language.
print("Language of the text: {}".format(response.language))
Nächste Schritte
Im Google Cloud-Beispielbrowser können Sie Codebeispiele für andere Google Cloud-Produkte suchen und filtern.