Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 1 | // Copyright 2020 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include <string> |
| 6 | #include <utility> |
| 7 | |
| 8 | #include "ml/soda_proto_mojom_conversion.h" |
| 9 | |
| 10 | using chromeos::machine_learning::mojom::EndpointerType; |
| 11 | using speech::soda::chrome::SodaEndpointEvent; |
| 12 | using speech::soda::chrome::SodaRecognitionResult; |
| 13 | using speech::soda::chrome::SodaResponse; |
| 14 | |
| 15 | namespace ml { |
| 16 | |
| 17 | chromeos::machine_learning::mojom::SpeechRecognizerEventPtr |
| 18 | SpeechRecognizerEventFromProto(const SodaResponse& soda_response) { |
| 19 | auto speech_recognizer_event = |
| 20 | chromeos::machine_learning::mojom::SpeechRecognizerEvent::New(); |
| 21 | if (soda_response.soda_type() == SodaResponse::AUDIO_LEVEL) { |
| 22 | auto audio_level_event = internal::AudioLevelEventFromProto(soda_response); |
| 23 | speech_recognizer_event->set_audio_event(std::move(audio_level_event)); |
| 24 | } else if (soda_response.soda_type() == SodaResponse::RECOGNITION) { |
| 25 | const auto& rec_result = soda_response.recognition_result(); |
| 26 | if (rec_result.result_type() == SodaRecognitionResult::PARTIAL) { |
| 27 | speech_recognizer_event->set_partial_result( |
| 28 | internal::PartialResultFromProto(soda_response)); |
| 29 | } else if (rec_result.result_type() == SodaRecognitionResult::FINAL) { |
| 30 | speech_recognizer_event->set_final_result( |
| 31 | internal::FinalResultFromProto(soda_response)); |
Rob Schonberger | 828bcff | 2021-01-13 10:44:01 +1100 | [diff] [blame] | 32 | } else if (rec_result.result_type() == SodaRecognitionResult::PREFETCH) { |
| 33 | speech_recognizer_event->set_partial_result( |
| 34 | internal::PartialResultFromPrefetchProto(soda_response)); |
Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 35 | } else { |
Rob Schonberger | 828bcff | 2021-01-13 10:44:01 +1100 | [diff] [blame] | 36 | LOG(ERROR) << "Only partial/prefetch/final results are supported, not " |
| 37 | << speech::soda::chrome::SodaRecognitionResult_ResultType_Name( |
| 38 | rec_result.result_type()); |
Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 39 | } |
| 40 | } else if (soda_response.soda_type() == SodaResponse::ENDPOINT) { |
| 41 | speech_recognizer_event->set_endpointer_event( |
| 42 | internal::EndpointerEventFromProto(soda_response)); |
| 43 | } else { |
| 44 | LOG(DFATAL) << "Unexpected type of soda type to convert: " |
| 45 | << speech::soda::chrome::SodaResponse_SodaMessageType_Name( |
| 46 | soda_response.soda_type()); |
| 47 | } |
| 48 | return speech_recognizer_event; |
| 49 | } |
| 50 | |
| 51 | bool IsStopSodaResponse(const SodaResponse& soda_response) { |
| 52 | return soda_response.soda_type() == SodaResponse::STOP; |
| 53 | } |
| 54 | bool IsStartSodaResponse(const SodaResponse& soda_response) { |
| 55 | return soda_response.soda_type() == SodaResponse::START; |
| 56 | } |
| 57 | |
| 58 | bool IsShutdownSodaResponse(const SodaResponse& soda_response) { |
| 59 | return soda_response.soda_type() == SodaResponse::SHUTDOWN; |
| 60 | } |
| 61 | |
| 62 | namespace internal { |
| 63 | chromeos::machine_learning::mojom::AudioLevelEventPtr AudioLevelEventFromProto( |
| 64 | const SodaResponse& soda_response) { |
| 65 | auto audio_level_event = |
| 66 | chromeos::machine_learning::mojom::AudioLevelEvent::New(); |
| 67 | if (!soda_response.has_audio_level_info()) { |
| 68 | LOG(DFATAL) << "Should only call this method if audio level info is set."; |
| 69 | return audio_level_event; |
| 70 | } |
| 71 | const auto& audio_level_info = soda_response.audio_level_info(); |
| 72 | audio_level_event->rms = audio_level_info.rms(); |
| 73 | audio_level_event->audio_level = audio_level_info.audio_level(); |
| 74 | |
| 75 | // TODO(robsc): add support for time here. |
| 76 | return audio_level_event; |
| 77 | } |
| 78 | |
Rob Schonberger | 828bcff | 2021-01-13 10:44:01 +1100 | [diff] [blame] | 79 | chromeos::machine_learning::mojom::PartialResultPtr |
| 80 | PartialResultFromPrefetchProto( |
| 81 | const speech::soda::chrome::SodaResponse& soda_response) { |
| 82 | auto partial_result = chromeos::machine_learning::mojom::PartialResult::New(); |
| 83 | if (!soda_response.has_recognition_result() || |
| 84 | soda_response.soda_type() != SodaResponse::RECOGNITION || |
| 85 | soda_response.recognition_result().result_type() != |
| 86 | SodaRecognitionResult::PREFETCH) { |
| 87 | LOG(DFATAL) << "Should only be called when there's a prefetch result."; |
| 88 | } |
| 89 | for (const std::string& hyp : |
| 90 | soda_response.recognition_result().hypothesis()) { |
| 91 | partial_result->partial_text.push_back(hyp); |
| 92 | } |
| 93 | return partial_result; |
| 94 | } |
| 95 | |
Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 96 | chromeos::machine_learning::mojom::PartialResultPtr PartialResultFromProto( |
| 97 | const SodaResponse& soda_response) { |
| 98 | auto partial_result = chromeos::machine_learning::mojom::PartialResult::New(); |
| 99 | if (!soda_response.has_recognition_result() || |
| 100 | soda_response.soda_type() != SodaResponse::RECOGNITION || |
| 101 | soda_response.recognition_result().result_type() != |
| 102 | SodaRecognitionResult::PARTIAL) { |
| 103 | LOG(DFATAL) |
| 104 | << "Should only call when there's a partial recognition result."; |
| 105 | return partial_result; |
| 106 | } |
| 107 | for (const std::string& hyp : |
| 108 | soda_response.recognition_result().hypothesis()) { |
| 109 | partial_result->partial_text.push_back(hyp); |
| 110 | } |
| 111 | return partial_result; |
| 112 | } |
| 113 | |
| 114 | chromeos::machine_learning::mojom::FinalResultPtr FinalResultFromProto( |
| 115 | const SodaResponse& soda_response) { |
| 116 | auto final_result = chromeos::machine_learning::mojom::FinalResult::New(); |
| 117 | if (!soda_response.has_recognition_result() || |
| 118 | soda_response.soda_type() != SodaResponse::RECOGNITION || |
| 119 | soda_response.recognition_result().result_type() != |
| 120 | SodaRecognitionResult::FINAL) { |
Rob Schonberger | 828bcff | 2021-01-13 10:44:01 +1100 | [diff] [blame] | 121 | LOG(DFATAL) << "Should only call when there's a final recognition result."; |
Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 122 | return final_result; |
| 123 | } |
| 124 | for (const std::string& hyp : |
| 125 | soda_response.recognition_result().hypothesis()) { |
| 126 | final_result->final_hypotheses.push_back(hyp); |
| 127 | } |
Rob Schonberger | d9c4c9d | 2021-04-12 15:09:48 +1000 | [diff] [blame] | 128 | if (soda_response.recognition_result().hypothesis_part_size() > 0) { |
| 129 | final_result->hypothesis_part.emplace(); |
| 130 | |
| 131 | for (const auto& hypothesis_part : |
| 132 | soda_response.recognition_result().hypothesis_part()) { |
| 133 | auto part_in_result = |
| 134 | chromeos::machine_learning::mojom::HypothesisPartInResult::New(); |
| 135 | for (const std::string& part : hypothesis_part.text()) { |
| 136 | part_in_result->text.push_back(part); |
| 137 | } |
| 138 | part_in_result->alignment = |
| 139 | base::TimeDelta::FromMilliseconds(hypothesis_part.alignment_ms()); |
| 140 | final_result->hypothesis_part->push_back(std::move(part_in_result)); |
| 141 | } |
| 142 | } |
Rob Schonberger | 07ee123 | 2020-10-12 17:50:06 +1100 | [diff] [blame] | 143 | // TODO(robsc): Add endpoint reason when available from |
| 144 | final_result->endpoint_reason = |
| 145 | chromeos::machine_learning::mojom::EndpointReason::ENDPOINT_UNKNOWN; |
| 146 | return final_result; |
| 147 | } |
| 148 | |
| 149 | chromeos::machine_learning::mojom::EndpointerEventPtr EndpointerEventFromProto( |
| 150 | const SodaResponse& soda_response) { |
| 151 | auto endpointer_event = |
| 152 | chromeos::machine_learning::mojom::EndpointerEvent::New(); |
| 153 | if (!soda_response.has_endpoint_event() || |
| 154 | soda_response.soda_type() != SodaResponse::ENDPOINT) { |
| 155 | LOG(DFATAL) << "Shouldn't have been called without an endpoint event."; |
| 156 | return endpointer_event; |
| 157 | } |
| 158 | const auto& soda_endpoint_event = soda_response.endpoint_event(); |
| 159 | // Set the type, we don't have the timing right here. |
| 160 | switch (soda_endpoint_event.endpoint_type()) { |
| 161 | case SodaEndpointEvent::START_OF_SPEECH: |
| 162 | endpointer_event->endpointer_type = EndpointerType::START_OF_SPEECH; |
| 163 | break; |
| 164 | case SodaEndpointEvent::END_OF_SPEECH: |
| 165 | endpointer_event->endpointer_type = EndpointerType::END_OF_SPEECH; |
| 166 | break; |
| 167 | case SodaEndpointEvent::END_OF_AUDIO: |
| 168 | endpointer_event->endpointer_type = EndpointerType::END_OF_AUDIO; |
| 169 | break; |
| 170 | case SodaEndpointEvent::END_OF_UTTERANCE: |
| 171 | endpointer_event->endpointer_type = EndpointerType::END_OF_UTTERANCE; |
| 172 | break; |
| 173 | default: |
| 174 | LOG(DFATAL) << "Unknown endpointer type."; |
| 175 | endpointer_event->endpointer_type = EndpointerType::END_OF_UTTERANCE; |
| 176 | break; |
| 177 | } |
| 178 | return endpointer_event; |
| 179 | } |
| 180 | |
| 181 | } // namespace internal |
| 182 | } // namespace ml |