Mulai 29 April 2025, model Gemini 1.5 Pro dan Gemini 1.5 Flash tidak tersedia di project yang belum pernah menggunakan model ini, termasuk project baru. Untuk mengetahui detailnya, lihat
Versi dan siklus proses model.
Mengevaluasi model pembuatan teks menggunakan layanan evaluasi AI Generatif Vertex AI
Tetap teratur dengan koleksi
Simpan dan kategorikan konten berdasarkan preferensi Anda.
Gunakan layanan evaluasi AI generatif Vertex AI untuk mengevaluasi model pembuatan untuk tugas Natural Language Processing (NLP) seperti ringkasan, terjemahan, dan menjawab pertanyaan.
Mempelajari lebih lanjut
Untuk dokumentasi mendetail yang menyertakan contoh kode ini, lihat artikel berikut:
Contoh kode
Kecuali dinyatakan lain, konten di halaman ini dilisensikan berdasarkan Lisensi Creative Commons Attribution 4.0, sedangkan contoh kode dilisensikan berdasarkan Lisensi Apache 2.0. Untuk mengetahui informasi selengkapnya, lihat Kebijakan Situs Google Developers. Java adalah merek dagang terdaftar dari Oracle dan/atau afiliasinya.
[[["Mudah dipahami","easyToUnderstand","thumb-up"],["Memecahkan masalah saya","solvedMyProblem","thumb-up"],["Lainnya","otherUp","thumb-up"]],[["Sulit dipahami","hardToUnderstand","thumb-down"],["Informasi atau kode contoh salah","incorrectInformationOrSampleCode","thumb-down"],["Informasi/contoh yang saya butuhkan tidak ada","missingTheInformationSamplesINeed","thumb-down"],["Masalah terjemahan","translationIssue","thumb-down"],["Lainnya","otherDown","thumb-down"]],[],[],[],null,["# Evaluate text generation models using Vertex AI Gen AI evaluation service\n\nUse the Vertex AI Gen AI evaluation service to evaluate generation models for Natural Language Processing (NLP) tasks like summarization, translation, and question answering.\n\nExplore further\n---------------\n\n\nFor detailed documentation that includes this code sample, see the following:\n\n- [Gen AI evaluation service API](/vertex-ai/generative-ai/docs/model-reference/evaluation)\n\nCode sample\n-----------\n\n### Go\n\n\nBefore trying this sample, follow the Go setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Go API\nreference documentation](/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import (\n \tcontext_pkg \"context\"\n \t\"fmt\"\n \t\"io\"\n\n \taiplatform \"cloud.google.com/go/aiplatform/apiv1beta1\"\n \taiplatformpb \"cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb\"\n \t\"google.golang.org/api/option\"\n )\n\n // evaluateModelResponse evaluates the output of an LLM for groundedness, i.e., how well\n // the model response connects with verifiable sources of information\n func evaluateModelResponse(w io.Writer, projectID, location string) error {\n \t// location = \"us-central1\"\n \tctx := context_pkg.Background()\n \tapiEndpoint := fmt.Sprintf(\"%s-aiplatform.googleapis.com:443\", location)\n \tclient, err := aiplatform.https://cloud.google.com/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1beta1.html#cloud_google_com_go_aiplatform_apiv1beta1_EvaluationClient_NewEvaluationClient(ctx, option.WithEndpoint(apiEndpoint))\n\n \tif err != nil {\n \t\treturn fmt.Errorf(\"unable to create aiplatform client: %w\", err)\n \t}\n \tdefer client.Close()\n\n \t// evaluate the pre-generated model response against the reference (ground truth)\n \tresponseToEvaluate := `\n The city is undertaking a major project to revamp its public transportation system.\n This initiative is designed to improve efficiency, reduce carbon emissions, and promote\n eco-friendly commuting. The city expects that this investment will enhance accessibility\n and usher in a new era of sustainable urban transportation.\n `\n \treference := `\n As part of a comprehensive initiative to tackle urban congestion and foster\n sustainable urban living, a major city has revealed ambitious plans for an\n extensive overhaul of its public transportation system. The project aims not\n only to improve the efficiency and reliability of public transit but also to\n reduce the city\\'s carbon footprint and promote eco-friendly commuting options.\n City officials anticipate that this strategic investment will enhance\n accessibility for residents and visitors alike, ushering in a new era of\n efficient, environmentally conscious urban transportation.\n `\n \treq := aiplatformpb.EvaluateInstancesRequest{\n \t\tLocation: fmt.Sprintf(\"projects/%s/locations/%s\", projectID, location),\n \t\t// Check the API reference for a full list of supported metric inputs:\n \t\t// https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1beta1#evaluateinstancesrequest\n \t\tMetricInputs: &aiplatformpb.EvaluateInstancesRequest_GroundednessInput{\n \t\t\tGroundednessInput: &aiplatformpb.GroundednessInput{\n \t\t\t\tMetricSpec: &aiplatformpb.GroundednessSpec{},\n \t\t\t\tInstance: &aiplatformpb.GroundednessInstance{\n \t\t\t\t\tContext: &reference,\n \t\t\t\t\tPrediction: &responseToEvaluate,\n \t\t\t\t},\n \t\t\t},\n \t\t},\n \t}\n\n \tresp, err := client.EvaluateInstances(ctx, &req)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"evaluateInstances failed: %v\", err)\n \t}\n\n \tresults := resp.GetGroundednessResult()\n \tfmt.Fprintf(w, \"score: %.2f\\n\", results.GetScore())\n \tfmt.Fprintf(w, \"confidence: %.2f\\n\", results.GetConfidence())\n \tfmt.Fprintf(w, \"explanation:\\n%s\\n\", results.GetExplanation())\n \t// Example response:\n \t// score: 1.00\n \t// confidence: 1.00\n \t// explanation:\n \t// STEP 1: All aspects of the response are found in the context.\n \t// The response accurately summarizes the city's plan to overhaul its public transportation system, highlighting the goals of ...\n \t// STEP 2: According to the rubric, the response is scored 1 because all aspects of the response are attributable to the context.\n\n \treturn nil\n }\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Python API\nreference documentation](/python/docs/reference/aiplatform/latest).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import pandas as pd\n\n import https://cloud.google.com/python/docs/reference/vertexai/latest/\n from vertexai.preview.evaluation import EvalTask, MetricPromptTemplateExamples\n\n # TODO(developer): Update and un-comment below line\n # PROJECT_ID = \"your-project-id\"\n https://cloud.google.com/python/docs/reference/vertexai/latest/.init(project=PROJECT_ID, location=\"us-central1\")\n\n eval_dataset = pd.DataFrame(\n {\n \"instruction\": [\n \"Summarize the text in one sentence.\",\n \"Summarize the text such that a five-year-old can understand.\",\n ],\n \"context\": [\n \"\"\"As part of a comprehensive initiative to tackle urban congestion and foster\n sustainable urban living, a major city has revealed ambitious plans for an\n extensive overhaul of its public transportation system. The project aims not\n only to improve the efficiency and reliability of public transit but also to\n reduce the city\\'s carbon footprint and promote eco-friendly commuting options.\n City officials anticipate that this strategic investment will enhance\n accessibility for residents and visitors alike, ushering in a new era of\n efficient, environmentally conscious urban transportation.\"\"\",\n \"\"\"A team of archaeologists has unearthed ancient artifacts shedding light on a\n previously unknown civilization. The findings challenge existing historical\n narratives and provide valuable insights into human history.\"\"\",\n ],\n \"response\": [\n \"A major city is revamping its public transportation system to fight congestion, reduce emissions, and make getting around greener and easier.\",\n \"Some people who dig for old things found some very special tools and objects that tell us about people who lived a long, long time ago! What they found is like a new puzzle piece that helps us understand how people used to live.\",\n ],\n }\n )\n\n eval_task = EvalTask(\n dataset=eval_dataset,\n metrics=[\n MetricPromptTemplateExamples.Pointwise.SUMMARIZATION_QUALITY,\n MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS,\n MetricPromptTemplateExamples.Pointwise.VERBOSITY,\n MetricPromptTemplateExamples.Pointwise.INSTRUCTION_FOLLOWING,\n ],\n )\n\n prompt_template = (\n \"Instruction: {instruction}. Article: {context}. Summary: {response}\"\n )\n result = eval_task.evaluate(prompt_template=prompt_template)\n\n print(\"Summary Metrics:\\n\")\n\n for key, value in result.summary_metrics.items():\n print(f\"{key}: \\t{value}\")\n\n print(\"\\n\\nMetrics Table:\\n\")\n print(result.metrics_table)\n # Example response:\n # Summary Metrics:\n # row_count: 2\n # summarization_quality/mean: 3.5\n # summarization_quality/std: 2.1213203435596424\n # ...\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=generativeaionvertexai)."]]