将给定文本分解为一系列句子和词法单元(通常是单词),并提供这些词法单元的相关语言信息。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
Go
如需向 Natural Language 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
func analyzeSyntax(ctx context.Context, client *language.Client, text string) (*languagepb.AnnotateTextResponse, error) {
return client.AnnotateText(ctx, &languagepb.AnnotateTextRequest{
Document: &languagepb.Document{
Source: &languagepb.Document_Content{
Content: text,
},
Type: languagepb.Document_PLAIN_TEXT,
},
Features: &languagepb.AnnotateTextRequest_Features{
ExtractSyntax: true,
},
EncodingType: languagepb.EncodingType_UTF8,
})
}
Java
如需向 Natural Language 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
// Instantiate the Language client com.google.cloud.language.v1.LanguageServiceClient
try (LanguageServiceClient language = LanguageServiceClient.create()) {
Document doc = Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build();
AnalyzeSyntaxRequest request =
AnalyzeSyntaxRequest.newBuilder()
.setDocument(doc)
.setEncodingType(EncodingType.UTF16)
.build();
// Analyze the syntax in the given text
AnalyzeSyntaxResponse response = language.analyzeSyntax(request);
// Print the response
for (Token token : response.getTokensList()) {
System.out.printf("\tText: %s\n", token.getText().getContent());
System.out.printf("\tBeginOffset: %d\n", token.getText().getBeginOffset());
System.out.printf("Lemma: %s\n", token.getLemma());
System.out.printf("PartOfSpeechTag: %s\n", token.getPartOfSpeech().getTag());
System.out.printf("\tAspect: %s\n", token.getPartOfSpeech().getAspect());
System.out.printf("\tCase: %s\n", token.getPartOfSpeech().getCase());
System.out.printf("\tForm: %s\n", token.getPartOfSpeech().getForm());
System.out.printf("\tGender: %s\n", token.getPartOfSpeech().getGender());
System.out.printf("\tMood: %s\n", token.getPartOfSpeech().getMood());
System.out.printf("\tNumber: %s\n", token.getPartOfSpeech().getNumber());
System.out.printf("\tPerson: %s\n", token.getPartOfSpeech().getPerson());
System.out.printf("\tProper: %s\n", token.getPartOfSpeech().getProper());
System.out.printf("\tReciprocity: %s\n", token.getPartOfSpeech().getReciprocity());
System.out.printf("\tTense: %s\n", token.getPartOfSpeech().getTense());
System.out.printf("\tVoice: %s\n", token.getPartOfSpeech().getVoice());
System.out.println("DependencyEdge");
System.out.printf("\tHeadTokenIndex: %d\n", token.getDependencyEdge().getHeadTokenIndex());
System.out.printf("\tLabel: %s\n\n", token.getDependencyEdge().getLabel());
}
return response.getTokensList();
}
Node.js
如需向 Natural Language 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
// Imports the Google Cloud client library
const language = require('@google-cloud/language');
// Creates a client
const client = new language.LanguageServiceClient();
/**
* TODO(developer): Uncomment the following line to run this code.
*/
// const text = 'Your text to analyze, e.g. Hello, world!';
// Prepares a document, representing the provided text
const document = {
content: text,
type: 'PLAIN_TEXT',
};
// Need to specify an encodingType to receive word offsets
const encodingType = 'UTF8';
// Detects the sentiment of the document
const [syntax] = await client.analyzeSyntax({document, encodingType});
console.log('Tokens:');
syntax.tokens.forEach(part => {
console.log(`${part.partOfSpeech.tag}: ${part.text.content}`);
console.log('Morphology:', part.partOfSpeech);
});
PHP
如需向 Natural Language 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
use Google\Cloud\Language\V1\Document;
use Google\Cloud\Language\V1\Document\Type;
use Google\Cloud\Language\V1\LanguageServiceClient;
use Google\Cloud\Language\V1\PartOfSpeech\Tag;
/**
* @param string $text The text to analyze
*/
function analyze_syntax(string $text): void
{
// Create the Natural Language client
$languageServiceClient = new LanguageServiceClient();
// Create a new Document, add text as content and set type to PLAIN_TEXT
$document = (new Document())
->setContent($text)
->setType(Type::PLAIN_TEXT);
// Call the analyzeEntities function
$response = $languageServiceClient->analyzeSyntax($document, []);
$tokens = $response->getTokens();
// Print out information about each entity
foreach ($tokens as $token) {
printf('Token text: %s' . PHP_EOL, $token->getText()->getContent());
printf('Token part of speech: %s' . PHP_EOL, Tag::name($token->getPartOfSpeech()->getTag()));
print(PHP_EOL);
}
}
Python
如需向 Natural Language 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
from google.cloud import language_v1
def sample_analyze_syntax(text_content):
"""
Analyzing Syntax in a String
Args:
text_content The text content to analyze
"""
client = language_v1.LanguageServiceClient()
# text_content = 'This is a short sentence.'
# Available types: PLAIN_TEXT, HTML
type_ = language_v1.Document.Type.PLAIN_TEXT
# Optional. If not specified, the language is automatically detected.
# For list of supported languages:
# https://cloud.google.com/natural-language/docs/languages
language = "en"
document = {"content": text_content, "type_": type_, "language": language}
# Available values: NONE, UTF8, UTF16, UTF32
encoding_type = language_v1.EncodingType.UTF8
response = client.analyze_syntax(
request={"document": document, "encoding_type": encoding_type}
)
# Loop through tokens returned from the API
for token in response.tokens:
# Get the text content of this token. Usually a word or punctuation.
text = token.text
print(f"Token text: {text.content}")
print(
f"Location of this token in overall document: {text.begin_offset}"
)
# Get the part of speech information for this token.
# Part of speech is defined in:
# http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
part_of_speech = token.part_of_speech
# Get the tag, e.g. NOUN, ADJ for Adjective, et al.
print(
"Part of Speech tag: {}".format(
language_v1.PartOfSpeech.Tag(part_of_speech.tag).name
)
)
# Get the voice, e.g. ACTIVE or PASSIVE
print(
"Voice: {}".format(
language_v1.PartOfSpeech.Voice(part_of_speech.voice).name
)
)
# Get the tense, e.g. PAST, FUTURE, PRESENT, et al.
print(
"Tense: {}".format(
language_v1.PartOfSpeech.Tense(part_of_speech.tense).name
)
)
# See API reference for additional Part of Speech information available
# Get the lemma of the token. Wikipedia lemma description
# https://en.wikipedia.org/wiki/Lemma_(morphology)
print(f"Lemma: {token.lemma}")
# Get the dependency tree parse information for this token.
# For more information on dependency labels:
# http://www.aclweb.org/anthology/P13-2017
dependency_edge = token.dependency_edge
print(f"Head token index: {dependency_edge.head_token_index}")
print(
"Label: {}".format(
language_v1.DependencyEdge.Label(dependency_edge.label).name
)
)
# Get the language of the text, which will be the same as
# the language specified in the request or, if not specified,
# the automatically-detected language.
print(f"Language of the text: {response.language}")
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。