Detecting Intent from a Stream

Here is an example of detecting intent by streaming to a Dialogflow agent.

Set up your GCP project and authentication

Create an agent

Import the example intents and entities

Detect intent

Go

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

func DetectIntentStream(projectID, sessionID, audioFile, languageCode string) (string, error) {
	ctx := context.Background()

	sessionClient, err := dialogflow.NewSessionsClient(ctx)
	if err != nil {
		return "", err
	}
	defer sessionClient.Close()

	if projectID == "" || sessionID == "" {
		return "", errors.New(fmt.Sprintf("Received empty project (%s) or session (%s)", projectID, sessionID))
	}

	sessionPath := fmt.Sprintf("projects/%s/agent/sessions/%s", projectID, sessionID)

	// In this example, we hard code the encoding and sample rate for simplicity.
	audioConfig := dialogflowpb.InputAudioConfig{AudioEncoding: dialogflowpb.AudioEncoding_AUDIO_ENCODING_LINEAR_16, SampleRateHertz: 16000, LanguageCode: languageCode}

	queryAudioInput := dialogflowpb.QueryInput_AudioConfig{AudioConfig: &audioConfig}

	queryInput := dialogflowpb.QueryInput{Input: &queryAudioInput}

	streamer, err := sessionClient.StreamingDetectIntent(ctx)
	if err != nil {
		return "", err
	}

	f, err := os.Open(audioFile)
	if err != nil {
		return "", err
	}

	defer f.Close()

	go func() {
		audioBytes := make([]byte, 1024)

		request := dialogflowpb.StreamingDetectIntentRequest{Session: sessionPath, QueryInput: &queryInput}
		err = streamer.Send(&request)
		if err != nil {
			log.Fatal(err)
		}

		for {
			_, err := f.Read(audioBytes)
			if err == io.EOF {
				streamer.CloseSend()
				break
			}
			if err != nil {
				log.Fatal(err)
			}

			request = dialogflowpb.StreamingDetectIntentRequest{InputAudio: audioBytes}
			err = streamer.Send(&request)
			if err != nil {
				log.Fatal(err)
			}
		}
	}()

	var queryResult *dialogflowpb.QueryResult

	for {
		response, err := streamer.Recv()
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Fatal(err)
		}

		recognitionResult := response.GetRecognitionResult()
		transcript := recognitionResult.GetTranscript()
		log.Printf("Recognition transcript: %s\n", transcript)

		queryResult = response.GetQueryResult()
	}

	fulfillmentText := queryResult.GetFulfillmentText()
	return fulfillmentText, nil
}

Java

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

/**
 * Returns the result of detect intent with streaming audio as input.
 *
 * Using the same `session_id` between requests allows continuation of the conversation.
 * @param projectId Project/Agent Id.
 * @param audioFilePath The audio file to be processed.
 * @param sessionId Identifier of the DetectIntent session.
 * @param languageCode Language code of the query.
 */
public static void detectIntentStream(String projectId, String audioFilePath, String sessionId,
    String languageCode) throws Throwable {
  // Start bi-directional StreamingDetectIntent stream.
  final CountDownLatch notification = new CountDownLatch(1);
  final List<Throwable> responseThrowables = new ArrayList<>();
  final List<StreamingDetectIntentResponse> responses = new ArrayList<>();

  // Instantiates a client
  try (SessionsClient sessionsClient = SessionsClient.create()) {
    // Set the session name using the sessionId (UUID) and projectID (my-project-id)
    SessionName session = SessionName.of(projectId, sessionId);
    System.out.println("Session Path: " + session.toString());

    // Note: hard coding audioEncoding and sampleRateHertz for simplicity.
    // Audio encoding of the audio content sent in the query request.
    AudioEncoding audioEncoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16;
    int sampleRateHertz = 16000;

    // Instructs the speech recognizer how to process the audio content.
    InputAudioConfig inputAudioConfig = InputAudioConfig.newBuilder()
        .setAudioEncoding(audioEncoding) // audioEncoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16
        .setLanguageCode(languageCode) // languageCode = "en-US"
        .setSampleRateHertz(sampleRateHertz) // sampleRateHertz = 16000
        .build();

    ApiStreamObserver<StreamingDetectIntentResponse> responseObserver =
        new ApiStreamObserver<StreamingDetectIntentResponse>() {
          @Override
          public void onNext(StreamingDetectIntentResponse response) {
            // Do something when receive a response
            responses.add(response);
          }

          @Override
          public void onError(Throwable t) {
            // Add error-handling
            responseThrowables.add(t);
          }

          @Override
          public void onCompleted() {
            // Do something when complete.
            notification.countDown();
          }
        };

    // Performs the streaming detect intent callable request
    ApiStreamObserver<StreamingDetectIntentRequest> requestObserver =
        sessionsClient.streamingDetectIntentCallable().bidiStreamingCall(responseObserver);

    // Build the query with the InputAudioConfig
    QueryInput queryInput = QueryInput.newBuilder().setAudioConfig(inputAudioConfig).build();

    try (FileInputStream audioStream = new FileInputStream(audioFilePath)) {
      // The first request contains the configuration
      StreamingDetectIntentRequest request = StreamingDetectIntentRequest.newBuilder()
          .setSession(session.toString())
          .setQueryInput(queryInput)
          .build();

      // Make the first request
      requestObserver.onNext(request);

      // Following messages: audio chunks. We just read the file in fixed-size chunks. In reality
      // you would split the user input by time.
      byte[] buffer = new byte[4096];
      int bytes;
      while ((bytes = audioStream.read(buffer)) != -1) {
        requestObserver.onNext(
            StreamingDetectIntentRequest.newBuilder()
                .setInputAudio(ByteString.copyFrom(buffer, 0, bytes))
                .build());
      }
    } catch (RuntimeException e) {
      // Cancel stream.
      requestObserver.onError(e);
    }
    // Half-close the stream.
    requestObserver.onCompleted();
    // Wait for the final response (without explicit timeout).
    notification.await();
    // Process errors/responses.
    if (!responseThrowables.isEmpty()) {
      throw responseThrowables.get(0);
    }
    if (responses.isEmpty()) {
      throw new RuntimeException("No response from Dialogflow.");
    }

    for (StreamingDetectIntentResponse response : responses) {
      if (response.hasRecognitionResult()) {
        System.out.format(
            "Intermediate transcript: '%s'\n", response.getRecognitionResult().getTranscript());
      }
    }

    // Display the last query result
    QueryResult queryResult = responses.get(responses.size() - 1).getQueryResult();
    System.out.println("====================");
    System.out.format("Query Text: '%s'\n", queryResult.getQueryText());
    System.out.format("Detected Intent: %s (confidence: %f)\n",
        queryResult.getIntent().getDisplayName(), queryResult.getIntentDetectionConfidence());
    System.out.format("Fulfillment Text: '%s'\n", queryResult.getFulfillmentText());
  }
}

Node.js

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

// Imports the Dialogflow library
const dialogflow = require('dialogflow');

// Instantiates a sessison client
const sessionClient = new dialogflow.SessionsClient();

// The path to the local file on which to perform speech recognition, e.g.
// /path/to/audio.raw const filename = '/path/to/audio.raw';

// The encoding of the audio file, e.g. 'AUDIO_ENCODING_LINEAR16'
// const encoding = 'AUDIO_ENCODING_LINEAR16';

// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';
let sessionPath = sessionClient.sessionPath(projectId, sessionId);

const initialStreamRequest = {
  session: sessionPath,
  queryParams: {
    session: sessionClient.sessionPath(projectId, sessionId),
  },
  queryInput: {
    audioConfig: {
      audioEncoding: encoding,
      sampleRateHertz: sampleRateHertz,
      languageCode: languageCode,
    },
    singleUtterance: true,
  },
};

// Create a stream for the streaming request.
const detectStream = sessionClient
  .streamingDetectIntent()
  .on('error', console.error)
  .on('data', data => {
    if (data.recognitionResult) {
      console.log(
        `Intermediate transcript: ${data.recognitionResult.transcript}`
      );
    } else {
      console.log(`Detected intent:`);
      logQueryResult(sessionClient, data.queryResult);
    }
  });

// Write the initial stream request to config for audio input.
detectStream.write(initialStreamRequest);

// Stream an audio file from disk to the Conversation API, e.g.
// "./resources/audio.raw"
pump(
  fs.createReadStream(filename),
  // Format the audio stream into the request format.
  through2.obj((obj, _, next) => {
    next(null, {inputAudio: obj});
  }),
  detectStream
);

PHP

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

namespace Google\Cloud\Samples\Dialogflow;

use Google\Cloud\Dialogflow\V2\SessionsClient;
use Google\Cloud\Dialogflow\V2\AudioEncoding;
use Google\Cloud\Dialogflow\V2\InputAudioConfig;
use Google\Cloud\Dialogflow\V2\QueryInput;
use Google\Cloud\Dialogflow\V2\StreamingDetectIntentRequest;

/**
* Returns the result of detect intent with streaming audio as input.
* Using the same `session_id` between requests allows continuation
* of the conversation.
*/
function detect_intent_stream($projectId, $path, $sessionId, $languageCode = 'en-US')
{
    // need to use gRPC
    if (!defined('Grpc\STATUS_OK')) {
        throw new \Exception('Install the grpc extension ' .
            '(pecl install grpc)');
    }

    // new session
    $sessionsClient = new SessionsClient();
    $session = $sessionsClient->sessionName($projectId, $sessionId ?: uniqid());
    printf('Session path: %s' . PHP_EOL, $session);

    // hard coding audio_encoding and sample_rate_hertz for simplicity
    $audioConfig = new InputAudioConfig();
    $audioConfig->setAudioEncoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16);
    $audioConfig->setLanguageCode($languageCode);
    $audioConfig->setSampleRateHertz(16000);

    // create query input
    $queryInput = new QueryInput();
    $queryInput->setAudioConfig($audioConfig);

    // first request contains the configuration
    $request = new StreamingDetectIntentRequest();
    $request->setSession($session);
    $request->setQueryInput($queryInput);
    $requests = [$request];

    // we are going to read small chunks of audio data from
    // a local audio file. in practice, these chunks should
    // come from an audio input device.
    $audioStream = fopen($path, 'rb');
    while (true) {
        $chunk = stream_get_contents($audioStream, 4096);
        if (!$chunk) {
            break;
        }
        $request = new StreamingDetectIntentRequest();
        $request->setInputAudio($chunk);
        $requests[] = $request;
    }

    // intermediate transcript info
    print(PHP_EOL . str_repeat("=", 20) . PHP_EOL);
    $stream = $sessionsClient->streamingDetectIntent();
    foreach ($requests as $request) {
        $stream->write($request);
    }
    foreach ($stream->closeWriteAndReadAll() as $response) {
        $recognitionResult = $response->getRecognitionResult();
        if ($recognitionResult) {
            $transcript = $recognitionResult->getTranscript();
            printf('Intermediate transcript: %s' . PHP_EOL, $transcript);
        }
    }
    print(str_repeat("=", 20) . PHP_EOL);

    // get final response and relevant info
    $queryResult = $response->getQueryResult();
    $queryText = $queryResult->getQueryText();
    $intent = $queryResult->getIntent();
    $displayName = $intent->getDisplayName();
    $confidence = $queryResult->getIntentDetectionConfidence();
    $fulfilmentText = $queryResult->getFulfillmentText();

    // output relevant info
    printf('Query text: %s' . PHP_EOL, $queryText);
    printf('Detected intent: %s (confidence: %f)' . PHP_EOL, $displayName,
        $confidence);
    print(PHP_EOL);
    printf('Fulfilment text: %s' . PHP_EOL, $fulfilmentText);

    $sessionsClient->close();
}

Python

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

def detect_intent_stream(project_id, session_id, audio_file_path,
                         language_code):
    """Returns the result of detect intent with streaming audio as input.

    Using the same `session_id` between requests allows continuation
    of the conversaion."""
    import dialogflow_v2 as dialogflow
    session_client = dialogflow.SessionsClient()

    # Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
    audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
    sample_rate_hertz = 16000

    session_path = session_client.session_path(project_id, session_id)
    print('Session path: {}\n'.format(session_path))

    def request_generator(audio_config, audio_file_path):
        query_input = dialogflow.types.QueryInput(audio_config=audio_config)

        # The first request contains the configuration.
        yield dialogflow.types.StreamingDetectIntentRequest(
            session=session_path, query_input=query_input)

        # Here we are reading small chunks of audio data from a local
        # audio file.  In practice these chunks should come from
        # an audio input device.
        with open(audio_file_path, 'rb') as audio_file:
            while True:
                chunk = audio_file.read(4096)
                if not chunk:
                    break
                # The later requests contains audio data.
                yield dialogflow.types.StreamingDetectIntentRequest(
                    input_audio=chunk)

    audio_config = dialogflow.types.InputAudioConfig(
        audio_encoding=audio_encoding, language_code=language_code,
        sample_rate_hertz=sample_rate_hertz)

    requests = request_generator(audio_config, audio_file_path)
    responses = session_client.streaming_detect_intent(requests)

    print('=' * 20)
    for response in responses:
        print('Intermediate transcript: "{}".'.format(
                response.recognition_result.transcript))

    # Note: The result from the last response is the final transcript along
    # with the detected content.
    query_result = response.query_result

    print('=' * 20)
    print('Query text: {}'.format(query_result.query_text))
    print('Detected intent: {} (confidence: {})\n'.format(
        query_result.intent.display_name,
        query_result.intent_detection_confidence))
    print('Fulfillment text: {}\n'.format(
        query_result.fulfillment_text))

Ruby

For more on installing and creating a Dialogflow client, refer to Dialogflow Client Libraries.

# project_id = "Your Google Cloud project ID"
# session_id = "mysession"
# audio_file_path = "resources/book_a_room.wav"
# language_code = "en-US"

require "google/cloud/dialogflow"
require "monitor"

session_client = Google::Cloud::Dialogflow::Sessions.new
session = session_client.class.session_path project_id, session_id
puts "Session path: #{session}"

audio_config = {
  audio_encoding: :AUDIO_ENCODING_LINEAR_16,
  sample_rate_hertz: 16000,
  language_code: language_code
}
query_input = { audio_config: audio_config }
streaming_config = { session: session, query_input: query_input }

# To signal the main thread when all responses have been processed
completed = false

# Use session_client as the sentinel to signal the end of queue
request_queue  = EnumeratorQueue.new(session_client)

# The first request needs to be the configuration.
request_queue.push(streaming_config)

# Consume the queue and process responses in a separate thread
Thread.new do
  session_client.streaming_detect_intent(request_queue.each_item).each do |response|
    if response.recognition_result
      puts "Intermediate transcript: #{response.recognition_result.transcript}\n"
    else
      # the last response has the actual query result
      query_result = response.query_result
      puts "Query text:        #{query_result.query_text}"
      puts "Intent detected:   #{query_result.intent.display_name}"
      puts "Intent confidence: #{query_result.intent_detection_confidence}"
      puts "Fulfillment text:  #{query_result.fulfillment_text}\n"
    end
  end
  completed = true
end

# While the main thread adds chunks of audio data to the queue
begin
  audio_file = File.open(audio_file_path, "rb")
    while true
      chunk = audio_file.read 4096
      break if not chunk
      request_queue.push({ input_audio: chunk})
      sleep 0.5
    end
ensure
  audio_file.close
  # pushing the sentinel session_client to end the streaming queues
  request_queue.push(session_client)
end

# Do not exit the main thread until the processing thread is completed
while not completed
  sleep 1
end

Was this page helpful? Let us know how we did:

Send feedback about...

Dialogflow Enterprise Edition Documentation