以分块形式并发下载文件

使用 Transfer Manager 以分块形式并发下载单个大型文件。

深入探索

如需查看包含此代码示例的详细文档,请参阅以下内容:

代码示例

Go

如需了解详情,请参阅 Cloud Storage Go API 参考文档

如需向 Cloud Storage 进行身份验证,请设置应用默认凭证。如需了解详情,请参阅为客户端库设置身份验证

package transfermanager

import (
	"context"
	"fmt"
	"io"
	"os"

	"cloud.google.com/go/storage"
	"cloud.google.com/go/storage/transfermanager"
)

// downloadChunksConcurrently downloads a single file in chunks, concurrently in a process pool.
func downloadChunksConcurrently(w io.Writer, bucketName, blobName, filename string) error {
	// bucketName := "your-bucket-name"
	// blobName := "target-file"
	// filename := "path/to/your/local/file.txt"

	// The chunkSize is the size of each chunk to be downloaded.
	// The performance impact of this value depends on the use case.
	// For example, for a slow network, using a smaller chunkSize may be better.
	// Providing this parameter is optional and the default value is 32 MiB.
	chunkSize := 16 * 1024 * 1024 // 16 MiB

	// The maximum number of workers to use for the operation.
	// Please note, providing this parameter is optional.
	// The performance impact of this value depends on the use case.
	// To download one large file, the default value: NumCPU / 2 is usually fine.
	workers := 8

	ctx := context.Background()
	client, err := storage.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("storage.NewClient: %w", err)
	}
	defer client.Close()

	d, err := transfermanager.NewDownloader(client, transfermanager.WithPartSize(int64(chunkSize)), transfermanager.WithWorkers(workers))
	if err != nil {
		return fmt.Errorf("transfermanager.NewDownloader: %w", err)
	}

	f, err := os.Create(filename)
	if err != nil {
		return fmt.Errorf("os.Create: %w", err)
	}
	defer f.Close()

	in := &transfermanager.DownloadObjectInput{
		Bucket:      bucketName,
		Object:      blobName,
		Destination: f,
	}

	if err := d.DownloadObject(ctx, in); err != nil {
		return fmt.Errorf("d.DownloadObject: %w", err)
	}

	// Wait for all downloads to complete and close the downloader.
	// This allows to synchronize the download processes.
	results, err := d.WaitAndClose()
	if err != nil {
		return fmt.Errorf("d.WaitAndClose: %w", err)
	}

	// Process the downloader result.
	if len(results) != 1 {
		return fmt.Errorf("expected 1 result, got %d", len(results))
	}
	result := results[0]
	if result.Err != nil {
		fmt.Fprintf(w, "download of %v failed with error %v\n", result.Object, result.Err)
		return result.Err
	}
	fmt.Fprintf(w, "Downloaded %v to %v.\n", blobName, filename)

	return nil
}

Java

如需了解详情,请参阅 Cloud Storage Java API 参考文档

如需向 Cloud Storage 进行身份验证,请设置应用默认凭证。如需了解详情,请参阅为客户端库设置身份验证

import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.transfermanager.DownloadResult;
import com.google.cloud.storage.transfermanager.ParallelDownloadConfig;
import com.google.cloud.storage.transfermanager.TransferManager;
import com.google.cloud.storage.transfermanager.TransferManagerConfig;
import java.nio.file.Path;
import java.util.List;

class AllowDivideAndConquerDownload {

  public static void divideAndConquerDownloadAllowed(
      List<BlobInfo> blobs, String bucketName, Path destinationDirectory) {
    TransferManager transferManager =
        TransferManagerConfig.newBuilder()
            .setAllowDivideAndConquerDownload(true)
            .build()
            .getService();
    ParallelDownloadConfig parallelDownloadConfig =
        ParallelDownloadConfig.newBuilder()
            .setBucketName(bucketName)
            .setDownloadDirectory(destinationDirectory)
            .build();
    List<DownloadResult> results =
        transferManager.downloadBlobs(blobs, parallelDownloadConfig).getDownloadResults();

    for (DownloadResult result : results) {
      System.out.println(
          "Download of "
              + result.getInput().getName()
              + " completed with status "
              + result.getStatus());
    }
  }
}

Node.js

如需了解详情,请参阅 Cloud Storage Node.js API 参考文档

如需向 Cloud Storage 进行身份验证,请设置应用默认凭证。如需了解详情,请参阅为客户端库设置身份验证

/**
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// The ID of your GCS bucket
// const bucketName = 'your-unique-bucket-name';

// The ID of the GCS file to download
// const fileName = 'your-file-name';

// The path to which the file should be downloaded
// const destFileName = '/local/path/to/file.txt';

// The size of each chunk to be downloaded
// const chunkSize = 1024;

// Imports the Google Cloud client library
const {Storage, TransferManager} = require('@google-cloud/storage');

// Creates a client
const storage = new Storage();

// Creates a transfer manager client
const transferManager = new TransferManager(storage.bucket(bucketName));

async function downloadFileInChunksWithTransferManager() {
  // Downloads the files
  await transferManager.downloadFileInChunks(fileName, {
    destination: destFileName,
    chunkSizeBytes: chunkSize,
  });

  console.log(
    `gs://${bucketName}/${fileName} downloaded to ${destFileName}.`
  );
}

downloadFileInChunksWithTransferManager().catch(console.error);

Python

如需了解详情,请参阅 Cloud Storage Python API 参考文档

如需向 Cloud Storage 进行身份验证,请设置应用默认凭证。如需了解详情,请参阅为客户端库设置身份验证

def download_chunks_concurrently(
    bucket_name, blob_name, filename, chunk_size=32 * 1024 * 1024, workers=8
):
    """Download a single file in chunks, concurrently in a process pool."""

    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The file to be downloaded
    # blob_name = "target-file"

    # The destination filename or path
    # filename = ""

    # The size of each chunk. The performance impact of this value depends on
    # the use case. The remote service has a minimum of 5 MiB and a maximum of
    # 5 GiB.
    # chunk_size = 32 * 1024 * 1024 (32 MiB)

    # The maximum number of processes to use for the operation. The performance
    # impact of this value depends on the use case, but smaller files usually
    # benefit from a higher number of processes. Each additional process occupies
    # some CPU and memory resources until finished. Threads can be used instead
    # of processes by passing `worker_type=transfer_manager.THREAD`.
    # workers=8

    from google.cloud.storage import Client, transfer_manager

    storage_client = Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    transfer_manager.download_chunks_concurrently(
        blob, filename, chunk_size=chunk_size, max_workers=workers
    )

    print("Downloaded {} to {}.".format(blob_name, filename))

后续步骤

如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅Google Cloud 示例浏览器