Detecting Intent from Audio

Here is an example of detecting intent by sending audio to a Dialogflow agent.

Set up your GCP project and authentication

Create an agent

Import the example intents and entities

Detect intent

Protocol

After you have imported the sample agent, you can now pass audio user input to the detectIntent endpoint to determine what the user has requested and which action to take. The audio must be base64 encoded. For information on how to encode and decode audio using base64 encodig, see Embedding Base64 encoded audio in the Cloud Speech API documentation.

Follow these steps to determine the intent from base64 encoded audio. In this example, the audio says "book a room".

  1. Use the following curl command to access the detectIntent endpoint and specify base-64 encoded audio. In this example, the audio says "book a room". Replace project-name with the name of your Google Cloud project. The command uses the gcloud command-line interface from the Google Cloud SDK to get an authorization token for the command. For instructions on setting up authorization, see Quickstart.

    curl -H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
         -H "Content-Type: application/json; charset=utf-8" --data "{
      'queryInput': {
        'audioConfig': {
          'languageCode': 'en-US'
        }
      },
      'inputAudio': 'UklGRkp5AABXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAARkxMUsw...'
    }" "https://dialogflow.googleapis.com/v2/projects/project-name/agent/sessions/123456789:detectIntent"
      

    You should see a response similar to the following. Notice that the action is room.reservation, and fulfillmentText asks the user for more information.

    {
      "responseId": "3c1e5a89-75b9-4c3f-b63d-4b1351dd5e32",
      "queryResult": {
        "queryText": "book a room",
        "action": "room.reservation",
        "parameters": {
          "time": "",
          "date": "",
          "guests": "",
          "duration": "",
          "location": ""
        },
        "fulfillmentText": "I can help with that. Where would you like to reserve a room?",
        "fulfillmentMessages": [
          {
            "text": {
              "text": [
                "I can help with that. Where would you like to reserve a room?"
              ]
            },
            "platform": "FACEBOOK"
          },
          {
            "text": {
              "text": [
                "I can help with that. Where would you like to reserve a room?"
              ]
            }
          }
        ],
        "outputContexts": [
          {
            "name": "projects/drothaus-cce/agent/sessions/123456789/contexts/e8f6a63e-73da-4a1a-8bfc-857183f71228_id_dialog_context",
            "lifespanCount": 2,
            "parameters": {
              "date": "",
              "guests": "",
              "duration": "",
              "location.original": "",
              "guests.original": "",
              "location": "",
              "date.original": "",
              "time.original": "",
              "time": "",
              "duration.original": ""
            }
          },
          {
            "name": "projects/drothaus-cce/agent/sessions/123456789/contexts/room_reservation_dialog_params_location",
            "lifespanCount": 1,
            "parameters": {
              "date.original": "",
              "time.original": "",
              "time": "",
              "duration.original": "",
              "date": "",
              "guests": "",
              "duration": "",
              "location.original": "",
              "guests.original": "",
              "location": ""
            }
          },
          {
            "name": "projects/drothaus-cce/agent/sessions/123456789/contexts/room_reservation_dialog_context",
            "lifespanCount": 2,
            "parameters": {
              "time.original": "",
              "time": "",
              "duration.original": "",
              "date": "",
              "guests": "",
              "duration": "",
              "location.original": "",
              "guests.original": "",
              "location": "",
              "date.original": ""
            }
          }
        ],
        "intent": {
          "name": "projects/drothaus-cce/agent/intents/e8f6a63e-73da-4a1a-8bfc-857183f71228",
          "displayName": "room.reservation"
        },
        "intentDetectionConfidence": 1,
        "diagnosticInfo": {},
        "languageCode": "en-us"
      }
    }
      

Go

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

func DetectIntentAudio(projectID, sessionID, audioFile, languageCode string) (string, error) {
	ctx := context.Background()

	sessionClient, err := dialogflow.NewSessionsClient(ctx)
	if err != nil {
		return "", err
	}
	defer sessionClient.Close()

	if projectID == "" || sessionID == "" {
		return "", errors.New(fmt.Sprintf("Received empty project (%s) or session (%s)", projectID, sessionID))
	}

	sessionPath := fmt.Sprintf("projects/%s/agent/sessions/%s", projectID, sessionID)

	// In this example, we hard code the encoding and sample rate for simplicity.
	audioConfig := dialogflowpb.InputAudioConfig{AudioEncoding: dialogflowpb.AudioEncoding_AUDIO_ENCODING_LINEAR_16, SampleRateHertz: 16000, LanguageCode: languageCode}

	queryAudioInput := dialogflowpb.QueryInput_AudioConfig{AudioConfig: &audioConfig}

	audioBytes, err := ioutil.ReadFile(audioFile)
	if err != nil {
		return "", err
	}

	queryInput := dialogflowpb.QueryInput{Input: &queryAudioInput}
	request := dialogflowpb.DetectIntentRequest{Session: sessionPath, QueryInput: &queryInput, InputAudio: audioBytes}

	response, err := sessionClient.DetectIntent(ctx, &request)
	if err != nil {
		return "", err
	}

	queryResult := response.GetQueryResult()
	fulfillmentText := queryResult.GetFulfillmentText()
	return fulfillmentText, nil
}

Java

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

/**
 * Returns the result of detect intent with an audio file as input.
 *
 * Using the same `session_id` between requests allows continuation of the conversation.
 * @param projectId Project/Agent Id.
 * @param audioFilePath Path to the audio file.
 * @param sessionId Identifier of the DetectIntent session.
 * @param languageCode Language code of the query.
 */
public static void detectIntentAudio(String projectId, String audioFilePath, String sessionId,
    String languageCode)
    throws Exception {
  // Instantiates a client
  try (SessionsClient sessionsClient = SessionsClient.create()) {
    // Set the session name using the sessionId (UUID) and projectID (my-project-id)
    SessionName session = SessionName.of(projectId, sessionId);
    System.out.println("Session Path: " + session.toString());

    // Note: hard coding audioEncoding and sampleRateHertz for simplicity.
    // Audio encoding of the audio content sent in the query request.
    AudioEncoding audioEncoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16;
    int sampleRateHertz = 16000;

    // Instructs the speech recognizer how to process the audio content.
    InputAudioConfig inputAudioConfig = InputAudioConfig.newBuilder()
        .setAudioEncoding(audioEncoding) // audioEncoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16
        .setLanguageCode(languageCode) // languageCode = "en-US"
        .setSampleRateHertz(sampleRateHertz) // sampleRateHertz = 16000
        .build();

    // Build the query with the InputAudioConfig
    QueryInput queryInput = QueryInput.newBuilder().setAudioConfig(inputAudioConfig).build();

    // Read the bytes from the audio file
    byte[] inputAudio = Files.readAllBytes(Paths.get(audioFilePath));

    // Build the DetectIntentRequest
    DetectIntentRequest request = DetectIntentRequest.newBuilder()
        .setSession(session.toString())
        .setQueryInput(queryInput)
        .setInputAudio(ByteString.copyFrom(inputAudio))
        .build();

    // Performs the detect intent request
    DetectIntentResponse response = sessionsClient.detectIntent(request);

    // Display the query result
    QueryResult queryResult = response.getQueryResult();
    System.out.println("====================");
    System.out.format("Query Text: '%s'\n", queryResult.getQueryText());
    System.out.format("Detected Intent: %s (confidence: %f)\n",
        queryResult.getIntent().getDisplayName(), queryResult.getIntentDetectionConfidence());
    System.out.format("Fulfillment Text: '%s'\n", queryResult.getFulfillmentText());
  }
}

Node.js

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

// Imports the Dialogflow library
const dialogflow = require('dialogflow');

// Instantiates a sessison client
const sessionClient = new dialogflow.SessionsClient();

// The path to identify the agent that owns the created intent.
const sessionPath = sessionClient.sessionPath(projectId, sessionId);

// Read the content of the audio file and send it as part of the request.
const readFile = common.util.promisify(fs.readFile, {singular: true});
readFile(filename)
  .then(inputAudio => {
    // The audio query request
    const request = {
      session: sessionPath,
      queryInput: {
        audioConfig: {
          audioEncoding: encoding,
          sampleRateHertz: sampleRateHertz,
          languageCode: languageCode,
        },
      },
      inputAudio: inputAudio,
    };
    // Recognizes the speech in the audio and detects its intent.
    return sessionClient.detectIntent(request);
  })
  .then(responses => {
    console.log('Detected intent:');
    logQueryResult(sessionClient, responses[0].queryResult);
  })
  .catch(err => {
    console.error('ERROR:', err);
  });

PHP

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

namespace Google\Cloud\Samples\Dialogflow;

use Google\Cloud\Dialogflow\V2\SessionsClient;
use Google\Cloud\Dialogflow\V2\AudioEncoding;
use Google\Cloud\Dialogflow\V2\InputAudioConfig;
use Google\Cloud\Dialogflow\V2\QueryInput;

/**
* Returns the result of detect intent with an audio file as input.
* Using the same `session_id` between requests allows continuation
* of the conversation.
*/
function detect_intent_audio($projectId, $path, $sessionId, $languageCode = 'en-US')
{
    // new session
    $sessionsClient = new SessionsClient();
    $session = $sessionsClient->sessionName($projectId, $sessionId ?: uniqid());
    printf('Session path: %s' . PHP_EOL, $session);

    // load audio file
    $inputAudio = file_get_contents($path);

    // hard coding audio_encoding and sample_rate_hertz for simplicity
    $audioConfig = new InputAudioConfig();
    $audioConfig->setAudioEncoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16);
    $audioConfig->setLanguageCode($languageCode);
    $audioConfig->setSampleRateHertz(16000);

    // create query input
    $queryInput = new QueryInput();
    $queryInput->setAudioConfig($audioConfig);

    // get response and relevant info
    $response = $sessionsClient->detectIntent($session, $queryInput, ['inputAudio' => $inputAudio]);
    $queryResult = $response->getQueryResult();
    $queryText = $queryResult->getQueryText();
    $intent = $queryResult->getIntent();
    $displayName = $intent->getDisplayName();
    $confidence = $queryResult->getIntentDetectionConfidence();
    $fulfilmentText = $queryResult->getFulfillmentText();

    // output relevant info
    print(str_repeat("=", 20) . PHP_EOL);
    printf('Query text: %s' . PHP_EOL, $queryText);
    printf('Detected intent: %s (confidence: %f)' . PHP_EOL, $displayName,
        $confidence);
    print(PHP_EOL);
    printf('Fulfilment text: %s' . PHP_EOL, $fulfilmentText);

    $sessionsClient->close();
}

Python

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

def detect_intent_audio(project_id, session_id, audio_file_path,
                        language_code):
    """Returns the result of detect intent with an audio file as input.

    Using the same `session_id` between requests allows continuation
    of the conversaion."""
    session_client = dialogflow.SessionsClient()

    # Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
    audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
    sample_rate_hertz = 16000

    session = session_client.session_path(project_id, session_id)
    print('Session path: {}\n'.format(session))

    with open(audio_file_path, 'rb') as audio_file:
        input_audio = audio_file.read()

    audio_config = dialogflow.types.InputAudioConfig(
        audio_encoding=audio_encoding, language_code=language_code,
        sample_rate_hertz=sample_rate_hertz)
    query_input = dialogflow.types.QueryInput(audio_config=audio_config)

    response = session_client.detect_intent(
        session=session, query_input=query_input,
        input_audio=input_audio)

    print('=' * 20)
    print('Query text: {}'.format(response.query_result.query_text))
    print('Detected intent: {} (confidence: {})\n'.format(
        response.query_result.intent.display_name,
        response.query_result.intent_detection_confidence))
    print('Fulfillment text: {}\n'.format(
        response.query_result.fulfillment_text))

Ruby

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

# project_id = "Your Google Cloud project ID"
# session_id = "mysession"
# audio_file_path = "resources/book_a_room.wav"
# language_code = "en-US"

require "google/cloud/dialogflow"

session_client = Google::Cloud::Dialogflow::Sessions.new
session = session_client.class.session_path project_id, session_id
puts "Session path: #{session}"

begin
  audio_file = File.open(audio_file_path, "rb")
  input_audio = audio_file.read
ensure
  audio_file.close
end

audio_config = { 
  audio_encoding: :AUDIO_ENCODING_LINEAR_16,
  sample_rate_hertz: 16000,
  language_code: language_code
}

query_input = { audio_config: audio_config }

response = session_client.detect_intent session, query_input, input_audio: input_audio
query_result = response.query_result

puts "Query text:        #{query_result.query_text}"
puts "Intent detected:   #{query_result.intent.display_name}"
puts "Intent confidence: #{query_result.intent_detection_confidence}"
puts "Fulfillment text:  #{query_result.fulfillment_text}"

Was this page helpful? Let us know how we did:

Send feedback about...

Dialogflow Enterprise Edition Documentation