|
| 1 | +import logging |
| 2 | +import uuid |
| 3 | +import base64 |
| 4 | +from typing import Any, Awaitable, Callable, Dict, Iterable, List, Optional, Text |
| 5 | + |
| 6 | +from rasa.core.channels.channel import InputChannel, OutputChannel, UserMessage |
| 7 | +import rasa.shared.utils.io |
| 8 | +from sanic import Blueprint, response |
| 9 | +from sanic.request import Request |
| 10 | +from sanic.response import HTTPResponse |
| 11 | +from socketio import AsyncServer |
| 12 | + |
| 13 | +from urllib.request import urlopen, Request |
| 14 | +from urllib.parse import urlencode |
| 15 | +import json |
| 16 | + |
| 17 | +logger = logging.getLogger(__name__) |
| 18 | + |
| 19 | +class SocketVoiceBlueprint(Blueprint): |
| 20 | + def __init__(self, sio: AsyncServer, socketio_path, *args, **kwargs): |
| 21 | + self.sio = sio |
| 22 | + self.socketio_path = socketio_path |
| 23 | + super().__init__(*args, **kwargs) |
| 24 | + |
| 25 | + def register(self, app, options) -> None: |
| 26 | + self.sio.attach(app, self.socketio_path) |
| 27 | + super().register(app, options) |
| 28 | + |
| 29 | + |
| 30 | +class SocketIOVoiceOutput(OutputChannel): |
| 31 | + @classmethod |
| 32 | + def name(cls) -> Text: |
| 33 | + return "socketiovoice" |
| 34 | + |
| 35 | + def __init__(self, sio: AsyncServer, bot_message_evt: Text, botium_speech_url: Text, botium_speech_apikey: Text, botium_speech_language: Text, botium_speech_voice: Text) -> None: |
| 36 | + self.sio = sio |
| 37 | + self.bot_message_evt = bot_message_evt |
| 38 | + self.botium_speech_url = botium_speech_url |
| 39 | + self.botium_speech_apikey = botium_speech_apikey |
| 40 | + self.botium_speech_language = botium_speech_language |
| 41 | + self.botium_speech_voice = botium_speech_voice |
| 42 | + |
| 43 | + async def _send_message(self, socket_id: Text, response: Any) -> None: |
| 44 | + """Sends a message to the recipient using the bot event.""" |
| 45 | + |
| 46 | + if response.get("text"): |
| 47 | + q = { |
| 48 | + 'text': response['text'] |
| 49 | + } |
| 50 | + if self.botium_speech_voice: |
| 51 | + q['voice'] = self.botium_speech_voice |
| 52 | + |
| 53 | + audioEndpoint = f"{self.botium_speech_url}/api/tts/{self.botium_speech_language}?{urlencode(q)}" |
| 54 | + audio = urlopen(audioEndpoint).read() |
| 55 | + logger.debug(f"_send_message- Calling Speech Endpoint: {audioEndpoint}") |
| 56 | + |
| 57 | + audioBase64 = base64.b64encode(audio).decode('ascii') |
| 58 | + audioUri = "data:audio/wav;base64," + audioBase64 |
| 59 | + response['link'] = audioUri |
| 60 | + |
| 61 | + await self.sio.emit(self.bot_message_evt, response, room=socket_id) |
| 62 | + |
| 63 | + async def send_text_message( |
| 64 | + self, recipient_id: Text, text: Text, **kwargs: Any |
| 65 | + ) -> None: |
| 66 | + """Send a message through this channel.""" |
| 67 | + |
| 68 | + for message_part in text.strip().split("\n\n"): |
| 69 | + await self._send_message(recipient_id, {"text": message_part}) |
| 70 | + |
| 71 | + async def send_image_url( |
| 72 | + self, recipient_id: Text, image: Text, **kwargs: Any |
| 73 | + ) -> None: |
| 74 | + """Sends an image to the output""" |
| 75 | + |
| 76 | + message = {"attachment": {"type": "image", "payload": {"src": image}}} |
| 77 | + await self._send_message(recipient_id, message) |
| 78 | + |
| 79 | + async def send_text_with_buttons( |
| 80 | + self, |
| 81 | + recipient_id: Text, |
| 82 | + text: Text, |
| 83 | + buttons: List[Dict[Text, Any]], |
| 84 | + **kwargs: Any, |
| 85 | + ) -> None: |
| 86 | + """Sends buttons to the output.""" |
| 87 | + |
| 88 | + # split text and create a message for each text fragment |
| 89 | + # the `or` makes sure there is at least one message we can attach the quick |
| 90 | + # replies to |
| 91 | + message_parts = text.strip().split("\n\n") or [text] |
| 92 | + messages = [{"text": message, "quick_replies": []} for message in message_parts] |
| 93 | + |
| 94 | + # attach all buttons to the last text fragment |
| 95 | + for button in buttons: |
| 96 | + messages[-1]["quick_replies"].append( |
| 97 | + { |
| 98 | + "content_type": "text", |
| 99 | + "title": button["title"], |
| 100 | + "payload": button["payload"], |
| 101 | + } |
| 102 | + ) |
| 103 | + |
| 104 | + for message in messages: |
| 105 | + await self._send_message(recipient_id, message) |
| 106 | + |
| 107 | + async def send_elements( |
| 108 | + self, recipient_id: Text, elements: Iterable[Dict[Text, Any]], **kwargs: Any |
| 109 | + ) -> None: |
| 110 | + """Sends elements to the output.""" |
| 111 | + |
| 112 | + for element in elements: |
| 113 | + message = { |
| 114 | + "attachment": { |
| 115 | + "type": "template", |
| 116 | + "payload": {"template_type": "generic", "elements": element}, |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + await self._send_message(recipient_id, message) |
| 121 | + |
| 122 | + async def send_custom_json( |
| 123 | + self, recipient_id: Text, json_message: Dict[Text, Any], **kwargs: Any |
| 124 | + ) -> None: |
| 125 | + """Sends custom json to the output""" |
| 126 | + |
| 127 | + json_message.setdefault("room", recipient_id) |
| 128 | + |
| 129 | + await self.sio.emit(self.bot_message_evt, **json_message) |
| 130 | + |
| 131 | + async def send_attachment( |
| 132 | + self, recipient_id: Text, attachment: Dict[Text, Any], **kwargs: Any |
| 133 | + ) -> None: |
| 134 | + """Sends an attachment to the user.""" |
| 135 | + await self._send_message(recipient_id, {"attachment": attachment}) |
| 136 | + |
| 137 | + |
| 138 | +class SocketIOVoiceInput(InputChannel): |
| 139 | + """A socket.io input channel.""" |
| 140 | + |
| 141 | + @classmethod |
| 142 | + def name(cls) -> Text: |
| 143 | + return "socketiovoice" |
| 144 | + |
| 145 | + @classmethod |
| 146 | + def from_credentials(cls, credentials: Optional[Dict[Text, Any]]) -> InputChannel: |
| 147 | + credentials = credentials or {} |
| 148 | + return cls( |
| 149 | + credentials.get("user_message_evt", "user_uttered"), |
| 150 | + credentials.get("bot_message_evt", "bot_uttered"), |
| 151 | + credentials.get("namespace"), |
| 152 | + credentials.get("session_persistence", False), |
| 153 | + credentials.get("socketio_path", "/socket.io"), |
| 154 | + credentials.get("botium_speech_url"), |
| 155 | + credentials.get("botium_speech_apikey"), |
| 156 | + credentials.get("botium_speech_language", "en"), |
| 157 | + credentials.get("botium_speech_voice"), |
| 158 | + ) |
| 159 | + |
| 160 | + def __init__( |
| 161 | + self, |
| 162 | + user_message_evt: Text = "user_uttered", |
| 163 | + bot_message_evt: Text = "bot_uttered", |
| 164 | + namespace: Optional[Text] = None, |
| 165 | + session_persistence: bool = False, |
| 166 | + socketio_path: Optional[Text] = "/socket.io", |
| 167 | + botium_speech_url: Text = None, |
| 168 | + botium_speech_apikey: Optional[Text] = None, |
| 169 | + botium_speech_language: Text = "en", |
| 170 | + botium_speech_voice: Optional[Text] = False, |
| 171 | + ): |
| 172 | + self.bot_message_evt = bot_message_evt |
| 173 | + self.session_persistence = session_persistence |
| 174 | + self.user_message_evt = user_message_evt |
| 175 | + self.namespace = namespace |
| 176 | + self.socketio_path = socketio_path |
| 177 | + self.botium_speech_url = botium_speech_url |
| 178 | + self.botium_speech_apikey = botium_speech_apikey |
| 179 | + self.botium_speech_language = botium_speech_language |
| 180 | + self.botium_speech_voice = botium_speech_voice |
| 181 | + self.sio = None |
| 182 | + |
| 183 | + def get_output_channel(self) -> Optional["OutputChannel"]: |
| 184 | + if self.sio is None: |
| 185 | + rasa.shared.utils.io.raise_warning( |
| 186 | + "SocketIO output channel cannot be recreated. " |
| 187 | + "This is expected behavior when using multiple Sanic " |
| 188 | + "workers or multiple Rasa Open Source instances. " |
| 189 | + "Please use a different channel for external events in these " |
| 190 | + "scenarios." |
| 191 | + ) |
| 192 | + return |
| 193 | + return SocketIOVoiceOutput(self.sio, self.bot_message_evt, self.botium_speech_url, self.botium_speech_apikey, self.botium_speech_language, self.botium_speech_voice) |
| 194 | + |
| 195 | + def blueprint( |
| 196 | + self, on_new_message: Callable[[UserMessage], Awaitable[Any]] |
| 197 | + ) -> Blueprint: |
| 198 | + # Workaround so that socketio works with requests from other origins. |
| 199 | + # https://github.com/miguelgrinberg/python-socketio/issues/205#issuecomment-493769183 |
| 200 | + sio = AsyncServer(async_mode="sanic", cors_allowed_origins=[]) |
| 201 | + socketio_webhook = SocketVoiceBlueprint( |
| 202 | + sio, self.socketio_path, "socketio_webhook", __name__ |
| 203 | + ) |
| 204 | + |
| 205 | + # make sio object static to use in get_output_channel |
| 206 | + self.sio = sio |
| 207 | + |
| 208 | + @socketio_webhook.route("/", methods=["GET"]) |
| 209 | + async def health(_: Request) -> HTTPResponse: |
| 210 | + return response.json({"status": "ok"}) |
| 211 | + |
| 212 | + @sio.on("connect", namespace=self.namespace) |
| 213 | + async def connect(sid: Text, _) -> None: |
| 214 | + logger.debug(f"User {sid} connected to socketIO endpoint.") |
| 215 | + |
| 216 | + @sio.on("disconnect", namespace=self.namespace) |
| 217 | + async def disconnect(sid: Text) -> None: |
| 218 | + logger.debug(f"User {sid} disconnected from socketIO endpoint.") |
| 219 | + |
| 220 | + @sio.on("session_request", namespace=self.namespace) |
| 221 | + async def session_request(sid: Text, data: Optional[Dict]): |
| 222 | + if data is None: |
| 223 | + data = {} |
| 224 | + if "session_id" not in data or data["session_id"] is None: |
| 225 | + data["session_id"] = uuid.uuid4().hex |
| 226 | + if self.session_persistence: |
| 227 | + sio.enter_room(sid, data["session_id"]) |
| 228 | + await sio.emit("session_confirm", data["session_id"], room=sid) |
| 229 | + logger.debug(f"User {sid} connected to socketIO endpoint.") |
| 230 | + |
| 231 | + @sio.on(self.user_message_evt, namespace=self.namespace) |
| 232 | + async def handle_message(sid: Text, data: Dict) -> Any: |
| 233 | + output_channel = SocketIOVoiceOutput(sio, self.bot_message_evt, self.botium_speech_url, self.botium_speech_apikey, self.botium_speech_language, self.botium_speech_voice) |
| 234 | + |
| 235 | + if self.session_persistence: |
| 236 | + if not data.get("session_id"): |
| 237 | + rasa.shared.utils.io.raise_warning( |
| 238 | + "A message without a valid session_id " |
| 239 | + "was received. This message will be " |
| 240 | + "ignored. Make sure to set a proper " |
| 241 | + "session id using the " |
| 242 | + "`session_request` socketIO event." |
| 243 | + ) |
| 244 | + return |
| 245 | + sender_id = data["session_id"] |
| 246 | + else: |
| 247 | + sender_id = sid |
| 248 | + |
| 249 | + if data['message'] and data['message'].startswith('data:'): |
| 250 | + header, encoded = data['message'].split(",", 1) |
| 251 | + |
| 252 | + audioData = base64.b64decode(encoded.encode('ascii')) |
| 253 | + |
| 254 | + convertEndpoint = f"{self.botium_speech_url}/api/convert/WAVTOMONOWAV" |
| 255 | + logger.debug(f"handle_message - Calling Convert Endpoint: {convertEndpoint}") |
| 256 | + res = urlopen(Request(url=convertEndpoint, data=audioData, method='POST', headers= { 'content-type': 'audio/wav' })) |
| 257 | + audioDataWav = res.read() |
| 258 | + |
| 259 | + #with open('decoded_image.wav', 'wb') as file_to_save: |
| 260 | + # file_to_save.write(audioData) |
| 261 | + |
| 262 | + audioEndpoint = f"{self.botium_speech_url}/api/stt/{self.botium_speech_language}" |
| 263 | + logger.debug(f"handle_message - Calling Speech Endpoint: {audioEndpoint}") |
| 264 | + res = urlopen(Request(url=audioEndpoint, data=audioDataWav, method='POST', headers= { 'content-type': 'audio/wav' })) |
| 265 | + resJson = json.loads(res.read().decode('utf-8')) |
| 266 | + logger.debug(f"handle_message - Calling Speech Endpoint: {audioEndpoint} => {resJson}") |
| 267 | + message = resJson["text"] |
| 268 | + |
| 269 | + await sio.emit(self.user_message_evt, {"text": message}, room=sid) |
| 270 | + else: |
| 271 | + message = data['message'] |
| 272 | + |
| 273 | + message = UserMessage( |
| 274 | + message, output_channel, sender_id, input_channel=self.name() |
| 275 | + ) |
| 276 | + await on_new_message(message) |
| 277 | + |
| 278 | + return socketio_webhook |
0 commit comments