Prozessorversion trainieren

Neuen Trainingsjob für einen Prozessor starten

Weitere Informationen

Eine ausführliche Dokumentation, die dieses Codebeispiel enthält, finden Sie hier:

Codebeispiel

Python

Weitere Informationen finden Sie in der Referenzdokumentation zur Document AI Python API.

Richten Sie zur Authentifizierung bei Document AI Standardanmeldedaten für Anwendungen ein. Weitere Informationen finden Sie unter Authentifizierung für eine lokale Entwicklungsumgebung einrichten.


from typing import Optional

from google.api_core.client_options import ClientOptions
from google.cloud import documentai  # type: ignore

# TODO(developer): Uncomment these variables before running the sample.
# project_id = 'YOUR_PROJECT_ID'
# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
# processor_id = 'YOUR_PROCESSOR_ID'
# processor_version_display_name = 'new-processor-version'
# train_data_uri = 'gs://bucket/directory/' # (Optional)
# test_data_uri = 'gs://bucket/directory/' # (Optional)


def train_processor_version_sample(
    project_id: str,
    location: str,
    processor_id: str,
    processor_version_display_name: str,
    train_data_uri: Optional[str] = None,
    test_data_uri: Optional[str] = None,
) -> None:
    # You must set the api_endpoint if you use a location other than 'us', e.g.:
    opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

    client = documentai.DocumentProcessorServiceClient(client_options=opts)

    # The full resource name of the processor
    # e.g. `projects/{project_id}/locations/{location}/processors/{processor_id}
    parent = client.processor_path(project_id, location, processor_id)

    processor_version = documentai.ProcessorVersion(
        display_name=processor_version_display_name
    )

    # If train/test data is not supplied, the default sets in the Cloud Console will be used
    input_data = documentai.TrainProcessorVersionRequest.InputData(
        training_documents=documentai.BatchDocumentsInputConfig(
            gcs_prefix=documentai.GcsPrefix(gcs_uri_prefix=train_data_uri)
        ),
        test_documents=documentai.BatchDocumentsInputConfig(
            gcs_prefix=documentai.GcsPrefix(gcs_uri_prefix=test_data_uri)
        ),
    )

    request = documentai.TrainProcessorVersionRequest(
        parent=parent, processor_version=processor_version, input_data=input_data
    )

    operation = client.train_processor_version(request=request)
    # Print operation details
    print(operation.operation.name)
    # Wait for operation to complete
    response = documentai.TrainProcessorVersionResponse(operation.result())

    metadata = documentai.TrainProcessorVersionMetadata(operation.metadata)

    print(f"New Processor Version:{response.processor_version}")
    print(f"Training Set Validation: {metadata.training_dataset_validation}")
    print(f"Test Set Validation: {metadata.test_dataset_validation}")

Nächste Schritte

Informationen zum Suchen und Filtern von Codebeispielen für andere Google Cloud -Produkte finden Sie im Google Cloud Beispielbrowser.