
本指南介绍了设计、实现、测试和部署 Knative serving 服务的最佳实践。如需了解更多提示,请参阅迁移现有服务


本部分介绍设计和实现 Knative serving 服务的一般最佳实践。


当 Knative serving 上运行的应用处理完请求后,容器实例对 CPU 的访问将被停用或受到严重限制。因此,您不应启动在请求处理程序范围之外运行的后台线程或例程。


后台活动是指在 HTTP 响应送达后发生的任何活动。建议检查您的代码,以确保所有异步操作都会在传送响应之前完成。

如果您怀疑服务中可能存在并不明显的后台活动,可以检查日志,以查找在 HTTP 请求条目后记录的任何内容。


在 Cloud Run 环境中,磁盘存储空间属于内存文件系统。 写入磁盘的文件会占用供服务使用的内存,并且可在多次调用之间继续留存。 如果不删除这些文件,最终可能会导致内存不足错误,并且随后需要进行冷启动。






  • 启动服务
    • 启动容器
    • 运行 entrypoint 命令以启动服务器。
  • 检查开放的服务端口。



如果您使用具有依赖项库的动态语言,例如导入 Node.js 模块,那么在冷启动期间加载这些模块会增加延迟时间。您可以通过以下方式缩短启动延迟时间:

  • 最大限度地减少依赖项的数量和大小,以构建精简服务。
  • 惰性加载不常用的代码(如果您所用的语言支持此方式)。
  • 使用代码加载优化技术,例如 PHP 的 Composer 自动加载器优化技术


在 Knative serving 中,您不能假设服务状态会在各请求之间保持不变。但是,Knative serving 确实会重复使用独立的容器实例来处理持续流量,因此您可以在全局范围内声明一个变量,以允许后续调用重复使用其值。无法预知重复使用此变量是否会让任何单独的请求受益。


const functions = require('@google-cloud/functions-framework');

// TODO(developer): Define your own computations
const {lightComputation, heavyComputation} = require('./computations');

// Global (instance-wide) scope
// This computation runs once (at instance cold-start)
const instanceVar = heavyComputation();

 * HTTP function that declares a variable.
 * @param {Object} req request context.
 * @param {Object} res response context.
functions.http('scopeDemo', (req, res) => {
  // Per-function scope
  // This computation runs every time this function is called
  const functionVar = lightComputation();

  res.send(`Per instance: ${instanceVar}, per function: ${functionVar}`);
import time

import functions_framework

# Placeholder
def heavy_computation():
    return time.time()

# Placeholder
def light_computation():
    return time.time()

# Global (instance-wide) scope
# This computation runs at instance cold-start
instance_var = heavy_computation()

def scope_demo(request):
    HTTP Cloud Function that declares a variable.
        request (flask.Request): The request object.
        The response text, or any set of values that can be turned into a
        Response object using `make_response`

    # Per-function scope
    # This computation runs every time this function is called
    function_var = light_computation()
    return f"Instance: {instance_var}; function: {function_var}"

// h is in the global (instance-wide) scope.
var h string

// init runs during package initialization. So, this will only run during an
// an instance's cold start.
func init() {
	h = heavyComputation()
	functions.HTTP("ScopeDemo", ScopeDemo)

// ScopeDemo is an example of using globally and locally
// scoped variables in a function.
func ScopeDemo(w http.ResponseWriter, r *http.Request) {
	l := lightComputation()
	fmt.Fprintf(w, "Global: %q, Local: %q", h, l)

import com.google.cloud.functions.HttpFunction;
import com.google.cloud.functions.HttpRequest;
import com.google.cloud.functions.HttpResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;

public class Scopes implements HttpFunction {
  // Global (instance-wide) scope
  // This computation runs at instance cold-start.
  // Warning: Class variables used in functions code must be thread-safe.
  private static final int INSTANCE_VAR = heavyComputation();

  public void service(HttpRequest request, HttpResponse response)
      throws IOException {
    // Per-function scope
    // This computation runs every time this function is called
    int functionVar = lightComputation();

    var writer = new PrintWriter(response.getWriter());
    writer.printf("Instance: %s; function: %s", INSTANCE_VAR, functionVar);

  private static int lightComputation() {
    int[] numbers = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    return Arrays.stream(numbers).sum();

  private static int heavyComputation() {
    int[] numbers = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    return Arrays.stream(numbers).reduce((t, x) -> t * x).getAsInt();



const functions = require('@google-cloud/functions-framework');

// Always initialized (at cold-start)
const nonLazyGlobal = fileWideComputation();

// Declared at cold-start, but only initialized if/when the function executes
let lazyGlobal;

 * HTTP function that uses lazy-initialized globals
 * @param {Object} req request context.
 * @param {Object} res response context.
functions.http('lazyGlobals', (req, res) => {
  // This value is initialized only if (and when) the function is called
  lazyGlobal = lazyGlobal || functionSpecificComputation();

  res.send(`Lazy global: ${lazyGlobal}, non-lazy global: ${nonLazyGlobal}`);
import functions_framework

# Always initialized (at cold-start)
non_lazy_global = file_wide_computation()

# Declared at cold-start, but only initialized if/when the function executes
lazy_global = None

def lazy_globals(request):
    HTTP Cloud Function that uses lazily-initialized globals.
        request (flask.Request): The request object.
        The response text, or any set of values that can be turned into a
        Response object using `make_response`
    global lazy_global, non_lazy_global

    # This value is initialized only if (and when) the function is called
    if not lazy_global:
        lazy_global = function_specific_computation()

    return f"Lazy: {lazy_global}, non-lazy: {non_lazy_global}."

// Package tips contains tips for writing Cloud Functions in Go.
package tips

import (


// client is lazily initialized by LazyGlobal.
var client *storage.Client
var clientOnce sync.Once

func init() {
	functions.HTTP("LazyGlobal", LazyGlobal)

// LazyGlobal is an example of lazily initializing a Google Cloud Storage client.
func LazyGlobal(w http.ResponseWriter, r *http.Request) {
	// You may wish to add different checks to see if the client is needed for
	// this request.
	clientOnce.Do(func() {
		// Pre-declare an err variable to avoid shadowing client.
		var err error
		client, err = storage.NewClient(context.Background())
		if err != nil {
			http.Error(w, "Internal error", http.StatusInternalServerError)
			log.Printf("storage.NewClient: %v", err)
	// Use client.

import com.google.cloud.functions.HttpFunction;
import com.google.cloud.functions.HttpRequest;
import com.google.cloud.functions.HttpResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;

public class LazyFields implements HttpFunction {
  // Always initialized (at cold-start)
  // Warning: Class variables used in Servlet classes must be thread-safe,
  // or else might introduce race conditions in your code.
  private static final int NON_LAZY_GLOBAL = fileWideComputation();

  // Declared at cold-start, but only initialized if/when the function executes
  // Uses the "initialization-on-demand holder" idiom
  // More information: https://en.wikipedia.org/wiki/Initialization-on-demand_holder_idiom
  private static class LazyGlobalHolder {
    // Making the default constructor private prohibits instantiation of this class
    private LazyGlobalHolder() {}

    // This value is initialized only if (and when) the getLazyGlobal() function below is called
    private static final Integer INSTANCE = functionSpecificComputation();

    private static Integer getInstance() {
      return LazyGlobalHolder.INSTANCE;

  public void service(HttpRequest request, HttpResponse response)
      throws IOException {
    Integer lazyGlobal = LazyGlobalHolder.getInstance();

    var writer = new PrintWriter(response.getWriter());
    writer.printf("Lazy global: %s; non-lazy global: %s%n", lazyGlobal, NON_LAZY_GLOBAL);

  private static int functionSpecificComputation() {
    int[] numbers = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9};
    return Arrays.stream(numbers).sum();

  private static int fileWideComputation() {
    int[] numbers = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9};
    return Arrays.stream(numbers).reduce((t, x) -> t * x).getAsInt();


Knative serving 实例可以“同时”并发处理多个请求,但不超过可配置的最大并发请求数。这与使用 concurrency = 1 的 Cloud Run functions 不同。





  1. 优化您的服务性能。
  2. 设置您在任何代码级并发配置中的预期并发支持级别。并非所有技术堆栈都要求进行此设置。
  3. 部署您的服务。
  4. 为您的服务设置等于或小于任何代码级配置的 Knative serving 并发请求数。如果没有代码级配置,请使用预期并发请求数。
  5. 使用支持可配置并发的负载测试工具。您需要确认您的服务在预期的负载和并发数情况下能够保持稳定。
  6. 如果服务运行状况不佳,请转到第 1 步来改进服务,或转到第 2 步来减少并发请求数。如果服务运行状况良好,则返回第 2 步并增加并发请求数。



您的服务处理的每个请求都需要一些额外的内存。 因此,当您增加或减少并发请求数时,请务必同时调整内存限制。





许多通用软件安全做法都适用于容器化应用。 有些做法专门面向容器,或者适合容器的理念和架构。


  • 使用得到积极维护的安全基础映像,例如 Google 基础映像或 Docker Hub 的官方映像

  • 定期重新构建容器映像并重新部署服务,以对您的服务应用安全更新。

  • 仅在容器中添加运行服务所需的内容。额外的代码、软件包和工具都有可能成为安全漏洞。请参阅上文了解相关的性能影响

  • 实现包含特定软件和库版本的确定性构建流程。这可防止您的容器中混入未经验证的代码。

  • 使用 Dockerfile USER 语句将容器设置为以 root 之外的用户身份运行。某些容器映像可能已经配置了特定用户。


启用漏洞扫描功能,以便对存储在 Artifact Registry 中的容器映像进行安全扫描。

您还可以使用 Binary Authorization 来确保仅部署安全的容器映像。



在 Knative serving 上,容器映像的大小不会影响冷启动或请求处理时间,也不会计入容器的可用内存


Ubuntu 是较大的基础映像,但很常用,可提供更全面的开箱即用服务器环境。

