您可以使用 OpenAI 库(Python 和 TypeScript/JavaScript)以及 REST API 来访问 Gemini 模型。Vertex AI 中使用 OpenAI 库时,仅支持 Google Cloud Auth。如果您尚未使用 OpenAI 库,我们建议您直接调用 Gemini API。
Python
import openai
from google.auth import default
import google.auth.transport.requests
# TODO(developer): Update and un-comment below lines
#project_id = "PROJECT_ID"
location = "us-central1"
# # Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token
)
response = client.chat.completions.create(
model="google/gemini-2.0-flash-001",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain to me how AI works"}
]
)
print(response.choices[0].message)
有哪些变化?
api_key=credentials.token
:如需使用 Google Cloud 身份验证,请使用示例代码获取Google Cloud 身份验证令牌。base_url
:这会告知 OpenAI 库将请求发送到 Google Cloud而不是默认网址。model="google/gemini-2.0-flash-001"
:从 Vertex 托管的模型中选择兼容的 Gemini 模型。
思考型
Gemini 2.5 系列模型经过训练,能够思考复杂问题,从而大幅提升推理能力。Gemini API 附带一个“思考预算”参数,可用于精细控制模型的思考量。
与 Gemini API 不同,OpenAI API 提供三个级别的思维控制:“低”“中”和“高”,这些级别在幕后分别映射到 1K、8K 和 24K 的思维令牌预算。
如需停用思考,请将推理努力程度设置为 None
。
Python
import openai
from google.auth import default
import google.auth.transport.requests
# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"
# # Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token
)
response = client.chat.completions.create(
model="google/gemini-2.5-flash-preview-04-17",
reasoning_effort="low",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "Explain to me how AI works"
}
]
)
print(response.choices[0].message)
流式
Gemini API 支持流式回答。
Python
import openai
from google.auth import default
import google.auth.transport.requests
# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token
)
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
],
stream=True
)
for chunk in response:
print(chunk.choices[0].delta)
函数调用
函数调用功能可让您更轻松地从生成式模型获取结构化数据输出,并且在 Gemini API 中受支持。
Python
import openai
from google.auth import default
import google.auth.transport.requests
# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. Chicago, IL",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
}
]
messages = [{"role": "user", "content": "What's the weather like in Chicago today?"}]
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=messages,
tools=tools,
tool_choice="auto"
)
print(response)
图片理解
Gemini 模型是原生多模态模型,可在许多常见的视觉任务中提供出色的性能。
Python
from google.auth import default
import google.auth.transport.requests
import base64
from openai import OpenAI
# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"
# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token,
)
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Getting the base64 string
#base64_image = encode_image("Path/to/image.jpeg")
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
)
print(response.choices[0])
生成图片
Python
from google.auth import default
import google.auth.transport.requests
import base64
from openai import OpenAI
# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"
# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token,
)
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Getting the base64 string
#base64_image = encode_image("Path/to/image.jpeg")
base64_image = encode_image("/content/wayfairsofa.jpg")
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
)
print(response.choices[0])
音频理解
分析音频输入:
Python
from google.auth import default
import google.auth.transport.requests
import base64
from openai import OpenAI
# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"
# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token,
)
with open("/path/to/your/audio/file.wav", "rb") as audio_file:
base64_audio = base64.b64encode(audio_file.read()).decode('utf-8')
response = client.chat.completions.create(
model="gemini-2.0-flash",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Transcribe this audio",
},
{
"type": "input_audio",
"input_audio": {
"data": base64_audio,
"format": "wav"
}
}
],
}
],
)
print(response.choices[0].message.content)
结构化输出
Gemini 模型可以输出采用您定义的任何结构的 JSON 对象。
Python
from google.auth import default
import google.auth.transport.requests
from pydantic import BaseModel
from openai import OpenAI
# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"
# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())
# OpenAI Client
client = openai.OpenAI(
base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
api_key=credentials.token,
)
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
completion = client.beta.chat.completions.parse(
model="google/gemini-2.0-flash",
messages=[
{"role": "system", "content": "Extract the event information."},
{"role": "user", "content": "John and Susan are going to an AI conference on Friday."},
],
response_format=CalendarEvent,
)
print(completion.choices[0].message.parsed)
当前限制
默认情况下,访问令牌的有效期为 1 小时。过期后,必须刷新。如需了解详情,请参阅此代码示例。
在扩展功能支持的同时,对 OpenAI 库的支持仍处于预览版阶段。如果您有任何问题或遇到任何问题,请在 Google Cloud 社区中发帖求助。
后续步骤
使用 Google Gen AI 库发掘 Gemini 的潜力。
查看更多使用 OpenAI 兼容语法调用 Chat Completions API 的示例。
如需了解哪些 Gemini 模型和参数受支持,请参阅概览页面。