Valutare una risposta del modello rispetto a un riferimento (dati empirici reali) utilizzando la metrica ROUGE

Questo esempio di codice mostra come utilizzare Vertex AI per calcolare le metriche ROUGE per la valutazione dei modelli di riassunto di testi. Mostra come definire un'attività di valutazione e calcolare i punteggi ROUGE per più riepiloghi generati rispetto a un riepilogo di riferimento.

Per saperne di più

Per la documentazione dettagliata che include questo esempio di codice, consulta quanto segue:

Esempio di codice

Go

Prima di provare questo esempio, segui le istruzioni di configurazione Go riportate nella guida rapida all'utilizzo delle librerie client di Vertex AI. Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Go di Vertex AI.

Per autenticarti a Vertex AI, configura le Credenziali predefinite dell'applicazione. Per ulteriori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

import (
	"context"
	"fmt"
	"io"

	aiplatform "cloud.google.com/go/aiplatform/apiv1beta1"
	aiplatformpb "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb"
	"google.golang.org/api/option"
)

// getROUGEScore evaluates a model response against a reference (ground truth) using the ROUGE metric
func getROUGEScore(w io.Writer, projectID, location string) error {
	// location = "us-central1"
	ctx := context.Background()
	apiEndpoint := fmt.Sprintf("%s-aiplatform.googleapis.com:443", location)
	client, err := aiplatform.NewEvaluationClient(ctx, option.WithEndpoint(apiEndpoint))

	if err != nil {
		return fmt.Errorf("unable to create aiplatform client: %w", err)
	}
	defer client.Close()

	modelResponse := `
The Great Barrier Reef, the world's largest coral reef system located in Australia,
is a vast and diverse ecosystem. However, it faces serious threats from climate change,
ocean acidification, and coral bleaching, endangering its rich marine life.
`
	reference := `
The Great Barrier Reef, the world's largest coral reef system, is
located off the coast of Queensland, Australia. It's a vast
ecosystem spanning over 2,300 kilometers with thousands of reefs
and islands. While it harbors an incredible diversity of marine
life, including endangered species, it faces serious threats from
climate change, ocean acidification, and coral bleaching.
`
	req := aiplatformpb.EvaluateInstancesRequest{
		Location: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
		MetricInputs: &aiplatformpb.EvaluateInstancesRequest_RougeInput{
			RougeInput: &aiplatformpb.RougeInput{
				// Check the API reference for the list of supported ROUGE metric types:
				// https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1beta1#rougespec
				MetricSpec: &aiplatformpb.RougeSpec{
					RougeType: "rouge1",
				},
				Instances: []*aiplatformpb.RougeInstance{
					{
						Prediction: &modelResponse,
						Reference:  &reference,
					},
				},
			},
		},
	}

	resp, err := client.EvaluateInstances(ctx, &req)
	if err != nil {
		return fmt.Errorf("evaluateInstances failed: %v", err)
	}

	fmt.Fprintln(w, "evaluation results:")
	fmt.Fprintln(w, resp.GetRougeResults().GetRougeMetricValues())
	// Example response:
	// [score:0.6597938]

	return nil
}

Python

Prima di provare questo esempio, segui le istruzioni di configurazione Python riportate nella guida rapida all'utilizzo delle librerie client di Vertex AI. Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Python di Vertex AI.

Per autenticarti a Vertex AI, configura le Credenziali predefinite dell'applicazione. Per ulteriori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

import pandas as pd

import vertexai
from vertexai.preview.evaluation import EvalTask

# TODO(developer): Update & uncomment line below
# PROJECT_ID = "your-project-id"
vertexai.init(project=PROJECT_ID, location="us-central1")

reference_summarization = """
The Great Barrier Reef, the world's largest coral reef system, is
located off the coast of Queensland, Australia. It's a vast
ecosystem spanning over 2,300 kilometers with thousands of reefs
and islands. While it harbors an incredible diversity of marine
life, including endangered species, it faces serious threats from
climate change, ocean acidification, and coral bleaching."""

# Compare pre-generated model responses against the reference (ground truth).
eval_dataset = pd.DataFrame(
    {
        "response": [
            """The Great Barrier Reef, the world's largest coral reef system located
        in Australia, is a vast and diverse ecosystem. However, it faces serious
        threats from climate change, ocean acidification, and coral bleaching,
        endangering its rich marine life.""",
            """The Great Barrier Reef, a vast coral reef system off the coast of
        Queensland, Australia, is the world's largest. It's a complex ecosystem
        supporting diverse marine life, including endangered species. However,
        climate change, ocean acidification, and coral bleaching are serious
        threats to its survival.""",
            """The Great Barrier Reef, the world's largest coral reef system off the
        coast of Australia, is a vast and diverse ecosystem with thousands of
        reefs and islands. It is home to a multitude of marine life, including
        endangered species, but faces serious threats from climate change, ocean
        acidification, and coral bleaching.""",
        ],
        "reference": [reference_summarization] * 3,
    }
)
eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[
        "rouge_1",
        "rouge_2",
        "rouge_l",
        "rouge_l_sum",
    ],
)
result = eval_task.evaluate()

print("Summary Metrics:\n")
for key, value in result.summary_metrics.items():
    print(f"{key}: \t{value}")

print("\n\nMetrics Table:\n")
print(result.metrics_table)
# Example response:
#
# Summary Metrics:
#
# row_count:      3
# rouge_1/mean:   0.7191161666666667
# rouge_1/std:    0.06765143922270488
# rouge_2/mean:   0.5441118566666666
# ...
# Metrics Table:
#
#                                        response                         reference  ...  rouge_l/score  rouge_l_sum/score
# 0  The Great Barrier Reef, the world's ...  \n    The Great Barrier Reef, the ...  ...       0.577320           0.639175
# 1  The Great Barrier Reef, a vast coral...  \n    The Great Barrier Reef, the ...  ...       0.552381           0.666667
# 2  The Great Barrier Reef, the world's ...  \n    The Great Barrier Reef, the ...  ...       0.774775           0.774775

Passaggi successivi

Per cercare e filtrare i sample di codice per altri prodotti Google Cloud , consulta il Google Cloud browser di sample.