blob: 623d03b2572f28b17c08aa43ae8755f7225c4c91 [file] [log] [blame]
Rob Schonberger07ee1232020-10-12 17:50:06 +11001// Copyright 2020 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <string>
6#include <utility>
7
8#include "ml/soda_proto_mojom_conversion.h"
9
10using chromeos::machine_learning::mojom::EndpointerType;
11using speech::soda::chrome::SodaEndpointEvent;
12using speech::soda::chrome::SodaRecognitionResult;
13using speech::soda::chrome::SodaResponse;
14
15namespace ml {
16
17chromeos::machine_learning::mojom::SpeechRecognizerEventPtr
18SpeechRecognizerEventFromProto(const SodaResponse& soda_response) {
19 auto speech_recognizer_event =
20 chromeos::machine_learning::mojom::SpeechRecognizerEvent::New();
21 if (soda_response.soda_type() == SodaResponse::AUDIO_LEVEL) {
22 auto audio_level_event = internal::AudioLevelEventFromProto(soda_response);
23 speech_recognizer_event->set_audio_event(std::move(audio_level_event));
24 } else if (soda_response.soda_type() == SodaResponse::RECOGNITION) {
25 const auto& rec_result = soda_response.recognition_result();
26 if (rec_result.result_type() == SodaRecognitionResult::PARTIAL) {
27 speech_recognizer_event->set_partial_result(
28 internal::PartialResultFromProto(soda_response));
29 } else if (rec_result.result_type() == SodaRecognitionResult::FINAL) {
30 speech_recognizer_event->set_final_result(
31 internal::FinalResultFromProto(soda_response));
Rob Schonberger828bcff2021-01-13 10:44:01 +110032 } else if (rec_result.result_type() == SodaRecognitionResult::PREFETCH) {
33 speech_recognizer_event->set_partial_result(
34 internal::PartialResultFromPrefetchProto(soda_response));
Rob Schonberger07ee1232020-10-12 17:50:06 +110035 } else {
Rob Schonberger828bcff2021-01-13 10:44:01 +110036 LOG(ERROR) << "Only partial/prefetch/final results are supported, not "
37 << speech::soda::chrome::SodaRecognitionResult_ResultType_Name(
38 rec_result.result_type());
Rob Schonberger07ee1232020-10-12 17:50:06 +110039 }
40 } else if (soda_response.soda_type() == SodaResponse::ENDPOINT) {
41 speech_recognizer_event->set_endpointer_event(
42 internal::EndpointerEventFromProto(soda_response));
43 } else {
44 LOG(DFATAL) << "Unexpected type of soda type to convert: "
45 << speech::soda::chrome::SodaResponse_SodaMessageType_Name(
46 soda_response.soda_type());
47 }
48 return speech_recognizer_event;
49}
50
51bool IsStopSodaResponse(const SodaResponse& soda_response) {
52 return soda_response.soda_type() == SodaResponse::STOP;
53}
54bool IsStartSodaResponse(const SodaResponse& soda_response) {
55 return soda_response.soda_type() == SodaResponse::START;
56}
57
58bool IsShutdownSodaResponse(const SodaResponse& soda_response) {
59 return soda_response.soda_type() == SodaResponse::SHUTDOWN;
60}
61
62namespace internal {
63chromeos::machine_learning::mojom::AudioLevelEventPtr AudioLevelEventFromProto(
64 const SodaResponse& soda_response) {
65 auto audio_level_event =
66 chromeos::machine_learning::mojom::AudioLevelEvent::New();
67 if (!soda_response.has_audio_level_info()) {
68 LOG(DFATAL) << "Should only call this method if audio level info is set.";
69 return audio_level_event;
70 }
71 const auto& audio_level_info = soda_response.audio_level_info();
72 audio_level_event->rms = audio_level_info.rms();
73 audio_level_event->audio_level = audio_level_info.audio_level();
74
75 // TODO(robsc): add support for time here.
76 return audio_level_event;
77}
78
Rob Schonberger828bcff2021-01-13 10:44:01 +110079chromeos::machine_learning::mojom::PartialResultPtr
80PartialResultFromPrefetchProto(
81 const speech::soda::chrome::SodaResponse& soda_response) {
82 auto partial_result = chromeos::machine_learning::mojom::PartialResult::New();
83 if (!soda_response.has_recognition_result() ||
84 soda_response.soda_type() != SodaResponse::RECOGNITION ||
85 soda_response.recognition_result().result_type() !=
86 SodaRecognitionResult::PREFETCH) {
87 LOG(DFATAL) << "Should only be called when there's a prefetch result.";
88 }
89 for (const std::string& hyp :
90 soda_response.recognition_result().hypothesis()) {
91 partial_result->partial_text.push_back(hyp);
92 }
93 return partial_result;
94}
95
Rob Schonberger07ee1232020-10-12 17:50:06 +110096chromeos::machine_learning::mojom::PartialResultPtr PartialResultFromProto(
97 const SodaResponse& soda_response) {
98 auto partial_result = chromeos::machine_learning::mojom::PartialResult::New();
99 if (!soda_response.has_recognition_result() ||
100 soda_response.soda_type() != SodaResponse::RECOGNITION ||
101 soda_response.recognition_result().result_type() !=
102 SodaRecognitionResult::PARTIAL) {
103 LOG(DFATAL)
104 << "Should only call when there's a partial recognition result.";
105 return partial_result;
106 }
107 for (const std::string& hyp :
108 soda_response.recognition_result().hypothesis()) {
109 partial_result->partial_text.push_back(hyp);
110 }
111 return partial_result;
112}
113
114chromeos::machine_learning::mojom::FinalResultPtr FinalResultFromProto(
115 const SodaResponse& soda_response) {
116 auto final_result = chromeos::machine_learning::mojom::FinalResult::New();
117 if (!soda_response.has_recognition_result() ||
118 soda_response.soda_type() != SodaResponse::RECOGNITION ||
119 soda_response.recognition_result().result_type() !=
120 SodaRecognitionResult::FINAL) {
Rob Schonberger828bcff2021-01-13 10:44:01 +1100121 LOG(DFATAL) << "Should only call when there's a final recognition result.";
Rob Schonberger07ee1232020-10-12 17:50:06 +1100122 return final_result;
123 }
124 for (const std::string& hyp :
125 soda_response.recognition_result().hypothesis()) {
126 final_result->final_hypotheses.push_back(hyp);
127 }
Rob Schonbergerd9c4c9d2021-04-12 15:09:48 +1000128 if (soda_response.recognition_result().hypothesis_part_size() > 0) {
129 final_result->hypothesis_part.emplace();
130
131 for (const auto& hypothesis_part :
132 soda_response.recognition_result().hypothesis_part()) {
133 auto part_in_result =
134 chromeos::machine_learning::mojom::HypothesisPartInResult::New();
135 for (const std::string& part : hypothesis_part.text()) {
136 part_in_result->text.push_back(part);
137 }
138 part_in_result->alignment =
139 base::TimeDelta::FromMilliseconds(hypothesis_part.alignment_ms());
140 final_result->hypothesis_part->push_back(std::move(part_in_result));
141 }
142 }
Rob Schonberger07ee1232020-10-12 17:50:06 +1100143 // TODO(robsc): Add endpoint reason when available from
144 final_result->endpoint_reason =
145 chromeos::machine_learning::mojom::EndpointReason::ENDPOINT_UNKNOWN;
146 return final_result;
147}
148
149chromeos::machine_learning::mojom::EndpointerEventPtr EndpointerEventFromProto(
150 const SodaResponse& soda_response) {
151 auto endpointer_event =
152 chromeos::machine_learning::mojom::EndpointerEvent::New();
153 if (!soda_response.has_endpoint_event() ||
154 soda_response.soda_type() != SodaResponse::ENDPOINT) {
155 LOG(DFATAL) << "Shouldn't have been called without an endpoint event.";
156 return endpointer_event;
157 }
158 const auto& soda_endpoint_event = soda_response.endpoint_event();
159 // Set the type, we don't have the timing right here.
160 switch (soda_endpoint_event.endpoint_type()) {
161 case SodaEndpointEvent::START_OF_SPEECH:
162 endpointer_event->endpointer_type = EndpointerType::START_OF_SPEECH;
163 break;
164 case SodaEndpointEvent::END_OF_SPEECH:
165 endpointer_event->endpointer_type = EndpointerType::END_OF_SPEECH;
166 break;
167 case SodaEndpointEvent::END_OF_AUDIO:
168 endpointer_event->endpointer_type = EndpointerType::END_OF_AUDIO;
169 break;
170 case SodaEndpointEvent::END_OF_UTTERANCE:
171 endpointer_event->endpointer_type = EndpointerType::END_OF_UTTERANCE;
172 break;
173 default:
174 LOG(DFATAL) << "Unknown endpointer type.";
175 endpointer_event->endpointer_type = EndpointerType::END_OF_UTTERANCE;
176 break;
177 }
178 return endpointer_event;
179}
180
181} // namespace internal
182} // namespace ml