Mulai 29 April 2025, model Gemini 1.5 Pro dan Gemini 1.5 Flash tidak tersedia di project yang belum pernah menggunakan model ini, termasuk project baru. Untuk mengetahui detailnya, lihat
Versi dan siklus proses model.
Evaluasi Kualitas Ringkasan Pasangan
Tetap teratur dengan koleksi
Simpan dan kategorikan konten berdasarkan preferensi Anda.
Contoh ini menunjukkan cara mengevaluasi kualitas ringkasan dua model AI Generatif menggunakan perbandingan berpasangan. Evaluasi menggunakan metrik yang menilai seberapa baik setiap model meringkas teks tertentu.
Mempelajari lebih lanjut
Untuk dokumentasi mendetail yang menyertakan contoh kode ini, lihat artikel berikut:
Contoh kode
Kecuali dinyatakan lain, konten di halaman ini dilisensikan berdasarkan Lisensi Creative Commons Attribution 4.0, sedangkan contoh kode dilisensikan berdasarkan Lisensi Apache 2.0. Untuk mengetahui informasi selengkapnya, lihat Kebijakan Situs Google Developers. Java adalah merek dagang terdaftar dari Oracle dan/atau afiliasinya.
[[["Mudah dipahami","easyToUnderstand","thumb-up"],["Memecahkan masalah saya","solvedMyProblem","thumb-up"],["Lainnya","otherUp","thumb-up"]],[["Sulit dipahami","hardToUnderstand","thumb-down"],["Informasi atau kode contoh salah","incorrectInformationOrSampleCode","thumb-down"],["Informasi/contoh yang saya butuhkan tidak ada","missingTheInformationSamplesINeed","thumb-down"],["Masalah terjemahan","translationIssue","thumb-down"],["Lainnya","otherDown","thumb-down"]],[],[],[],null,["# Pairwise Summarization Quality Evaluation\n\nThis sample demonstrates how to evaluate two Generative AI models for their summarization quality using pairwise comparison. The evaluation uses a metric that assesses how well each model summarizes a given text.\n\nExplore further\n---------------\n\n\nFor detailed documentation that includes this code sample, see the following:\n\n- [Gen AI evaluation service API](/vertex-ai/generative-ai/docs/model-reference/evaluation)\n\nCode sample\n-----------\n\n### Go\n\n\nBefore trying this sample, follow the Go setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Go API\nreference documentation](/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import (\n \tcontext_pkg \"context\"\n \t\"fmt\"\n \t\"io\"\n\n \taiplatform \"cloud.google.com/go/aiplatform/apiv1beta1\"\n \taiplatformpb \"cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb\"\n \t\"google.golang.org/api/option\"\n )\n\n // pairwiseEvaluation lets the judge model to compare the responses of two models and pick the better one\n func pairwiseEvaluation(w io.Writer, projectID, location string) error {\n \t// location = \"us-central1\"\n \tctx := context_pkg.Background()\n \tapiEndpoint := fmt.Sprintf(\"%s-aiplatform.googleapis.com:443\", location)\n \tclient, err := aiplatform.https://cloud.google.com/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1beta1.html#cloud_google_com_go_aiplatform_apiv1beta1_EvaluationClient_NewEvaluationClient(ctx, option.WithEndpoint(apiEndpoint))\n\n \tif err != nil {\n \t\treturn fmt.Errorf(\"unable to create aiplatform client: %w\", err)\n \t}\n \tdefer client.Close()\n\n \tcontext := `\n As part of a comprehensive initiative to tackle urban congestion and foster\n sustainable urban living, a major city has revealed ambitious plans for an\n extensive overhaul of its public transportation system. The project aims not\n only to improve the efficiency and reliability of public transit but also to\n reduce the city\\'s carbon footprint and promote eco-friendly commuting options.\n City officials anticipate that this strategic investment will enhance\n accessibility for residents and visitors alike, ushering in a new era of\n efficient, environmentally conscious urban transportation.\n `\n \tinstruction := \"Summarize the text such that a five-year-old can understand.\"\n \tbaselineResponse := `\n The city wants to make it easier for people to get around without using cars.\n They're going to make the buses and trains better and faster, so people will want to\n use them more. This will help the air be cleaner and make the city a better place to live.\n `\n \tcandidateResponse := `\n The city is making big changes to how people get around. They want to make the buses and\n trains work better and be easier for everyone to use. This will also help the environment\n by getting people to use less gas. The city thinks these changes will make it easier for\n everyone to get where they need to go.\n `\n\n \treq := aiplatformpb.EvaluateInstancesRequest{\n \t\tLocation: fmt.Sprintf(\"projects/%s/locations/%s\", projectID, location),\n \t\tMetricInputs: &aiplatformpb.EvaluateInstancesRequest_PairwiseSummarizationQualityInput{\n \t\t\tPairwiseSummarizationQualityInput: &aiplatformpb.PairwiseSummarizationQualityInput{\n \t\t\t\tMetricSpec: &aiplatformpb.PairwiseSummarizationQualitySpec{},\n \t\t\t\tInstance: &aiplatformpb.PairwiseSummarizationQualityInstance{\n \t\t\t\t\tContext: &context,\n \t\t\t\t\tInstruction: &instruction,\n \t\t\t\t\tPrediction: &candidateResponse,\n \t\t\t\t\tBaselinePrediction: &baselineResponse,\n \t\t\t\t},\n \t\t\t},\n \t\t},\n \t}\n\n \tresp, err := client.EvaluateInstances(ctx, &req)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"evaluateInstances failed: %v\", err)\n \t}\n\n \tresults := resp.GetPairwiseSummarizationQualityResult()\n \tfmt.Fprintf(w, \"choice: %s\\n\", results.GetPairwiseChoice())\n \tfmt.Fprintf(w, \"confidence: %.2f\\n\", results.GetConfidence())\n \tfmt.Fprintf(w, \"explanation:\\n%s\\n\", results.GetExplanation())\n \t// Example response:\n \t// choice: BASELINE\n \t// confidence: 0.50\n \t// explanation:\n \t// BASELINE response is easier to understand. For example, the phrase \"...\" is easier to understand than \"...\". Thus, BASELINE response is ...\n\n \treturn nil\n }\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Python API\nreference documentation](/python/docs/reference/aiplatform/latest).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import pandas as pd\n\n import https://cloud.google.com/python/docs/reference/vertexai/latest/\n from vertexai.generative_models import https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.generative_models.GenerativeModel.html\n from vertexai.evaluation import (\n EvalTask,\n PairwiseMetric,\n MetricPromptTemplateExamples,\n )\n\n # TODO(developer): Update & uncomment line below\n # PROJECT_ID = \"your-project-id\"\n https://cloud.google.com/python/docs/reference/vertexai/latest/.init(project=PROJECT_ID, location=\"us-central1\")\n\n prompt = \"\"\"\n Summarize the text such that a five-year-old can understand.\n\n # Text\n\n As part of a comprehensive initiative to tackle urban congestion and foster\n sustainable urban living, a major city has revealed ambitious plans for an\n extensive overhaul of its public transportation system. The project aims not\n only to improve the efficiency and reliability of public transit but also to\n reduce the city\\'s carbon footprint and promote eco-friendly commuting options.\n City officials anticipate that this strategic investment will enhance\n accessibility for residents and visitors alike, ushering in a new era of\n efficient, environmentally conscious urban transportation.\n \"\"\"\n\n eval_dataset = pd.DataFrame({\"prompt\": [prompt]})\n\n # Baseline model for pairwise comparison\n baseline_model = GenerativeModel(\"gemini-2.0-flash-lite-001\")\n\n # Candidate model for pairwise comparison\n candidate_model = GenerativeModel(\n \"gemini-2.0-flash-001\", generation_config={\"temperature\": 0.4}\n )\n\n prompt_template = MetricPromptTemplateExamples.get_prompt_template(\n \"pairwise_summarization_quality\"\n )\n\n summarization_quality_metric = PairwiseMetric(\n metric=\"pairwise_summarization_quality\",\n metric_prompt_template=prompt_template,\n baseline_model=baseline_model,\n )\n\n eval_task = EvalTask(\n dataset=eval_dataset,\n metrics=[summarization_quality_metric],\n experiment=\"pairwise-experiment\",\n )\n result = eval_task.evaluate(model=candidate_model)\n\n baseline_model_response = result.metrics_table[\"baseline_model_response\"].iloc[0]\n candidate_model_response = result.metrics_table[\"response\"].iloc[0]\n winner_model = result.metrics_table[\n \"pairwise_summarization_quality/pairwise_choice\"\n ].iloc[0]\n explanation = result.metrics_table[\n \"pairwise_summarization_quality/explanation\"\n ].iloc[0]\n\n print(f\"Baseline's story:\\n{baseline_model_response}\")\n print(f\"Candidate's story:\\n{candidate_model_response}\")\n print(f\"Winner: {winner_model}\")\n print(f\"Explanation: {explanation}\")\n # Example response:\n # Baseline's story:\n # A big city wants to make it easier for people to get around without using cars! They're going to make buses and trains ...\n #\n # Candidate's story:\n # A big city wants to make it easier for people to get around without using cars! ... This will help keep the air clean ...\n #\n # Winner: CANDIDATE\n # Explanation: Both responses adhere to the prompt's constraints, are grounded in the provided text, and ... However, Response B ...\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=generativeaionvertexai)."]]