Refactor voice control core and robot behavior
This commit is contained in:
@@ -1,178 +1,106 @@
|
||||
# Dora IOBridge Node
|
||||
|
||||
A WebSocket server that bridges web clients with the Dora dataflow for real-time voice commands and scene updates.
|
||||
Generic WebSocket bridge between web clients and Dora dataflow.
|
||||
|
||||
## Inputs/Outputs
|
||||
## Dora Outputs (WebSocket → Dora)
|
||||
|
||||
| Input | Type | Description |
|
||||
|----------------|--------|---------------------------------------|
|
||||
| `voice_out` | JSON | Response from voice control node |
|
||||
| `scene_update` | JSON | Scene objects from voice control |
|
||||
| Output | Type | Description |
|
||||
|-------------|--------|--------------------------|
|
||||
| `text_out` | string | Text from clients |
|
||||
| `audio_out` | bytes | WAV audio from clients |
|
||||
| `data_out` | JSON | Generic data from clients|
|
||||
|
||||
| Output | Type | Description |
|
||||
|----------------|--------|---------------------------------------|
|
||||
| `voice_in` | string | Voice commands forwarded to Dora |
|
||||
## Dora Inputs (Dora → WebSocket)
|
||||
|
||||
| Input | Type | Description |
|
||||
|-------------|--------|--------------------------|
|
||||
| `text_in` | string | Text to broadcast |
|
||||
| `audio_in` | bytes | WAV audio to broadcast |
|
||||
| `data_in` | JSON | Generic data to broadcast|
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
VOICE_HOST=0.0.0.0 # Bind address
|
||||
VOICE_PORT=8765 # Listen port
|
||||
| Variable | Default | Description |
|
||||
|-----------|-----------|--------------|
|
||||
| `WS_HOST` | `0.0.0.0` | Bind address |
|
||||
| `WS_PORT` | `8765` | Listen port |
|
||||
|
||||
## WebSocket Endpoint
|
||||
|
||||
```text
|
||||
ws://{WS_HOST}:{WS_PORT}
|
||||
```
|
||||
|
||||
## Installation
|
||||
Default: `ws://0.0.0.0:8765`
|
||||
|
||||
```bash
|
||||
cd dora_iobridge
|
||||
pip install -e .
|
||||
## WebSocket Protocol
|
||||
|
||||
### Client → Server
|
||||
|
||||
| Type | Field | Description |
|
||||
|---------|-----------|-----------------------|
|
||||
| `text` | `content` | Text string |
|
||||
| `audio` | `content` | Base64-encoded WAV |
|
||||
| `data` | `payload` | Any JSON object |
|
||||
| `ping` | - | Health check |
|
||||
|
||||
Examples:
|
||||
|
||||
```json
|
||||
{"type": "text", "content": "hello world"}
|
||||
{"type": "audio", "content": "UklGRi4AAABXQVZFZm10..."}
|
||||
{"type": "data", "payload": {"key": "value"}}
|
||||
{"type": "ping"}
|
||||
```
|
||||
|
||||
### Server → Client
|
||||
|
||||
| Type | Field | Description |
|
||||
|---------|-----------|-----------------------|
|
||||
| `text` | `content` | Text string |
|
||||
| `audio` | `content` | Base64-encoded WAV |
|
||||
| `data` | `payload` | Any JSON object |
|
||||
| `pong` | - | Response to ping |
|
||||
| `error` | `message` | Error description |
|
||||
|
||||
Examples:
|
||||
|
||||
```json
|
||||
{"type": "text", "content": "response text"}
|
||||
{"type": "audio", "content": "UklGRi4A...", "format": "wav"}
|
||||
{"type": "data", "payload": {"objects": [...]}}
|
||||
{"type": "pong"}
|
||||
{"type": "error", "message": "Invalid JSON"}
|
||||
```
|
||||
|
||||
## Dataflow Example
|
||||
|
||||
```yaml
|
||||
- id: iobridge
|
||||
build: uv pip install -e dora_iobridge
|
||||
path: dora_iobridge/dora_iobridge/main.py
|
||||
env:
|
||||
WS_HOST: "0.0.0.0"
|
||||
WS_PORT: "8765"
|
||||
inputs:
|
||||
text_in: voice/voice_out
|
||||
data_in: voice/scene_update
|
||||
outputs:
|
||||
- text_out
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Test with WebSocket (wscat)
|
||||
|
||||
```bash
|
||||
# Install wscat
|
||||
npm install -g wscat
|
||||
|
||||
# Connect to the server
|
||||
wscat -c ws://localhost:8765
|
||||
```
|
||||
|
||||
### Test with curl (websocat)
|
||||
|
||||
```bash
|
||||
# Install websocat
|
||||
# Ubuntu: sudo apt install websocat
|
||||
# macOS: brew install websocat
|
||||
sudo apt install websocat
|
||||
|
||||
# Send a ping
|
||||
echo '{"type": "ping"}' | websocat ws://localhost:8765
|
||||
# Response: {"type": "pong"}
|
||||
# Connect
|
||||
websocat ws://localhost:8765
|
||||
|
||||
# Send a voice command
|
||||
echo '{"type": "command", "text": "sube"}' | websocat ws://localhost:8765
|
||||
# Send text
|
||||
{"type": "text", "content": "hello"}
|
||||
|
||||
# Request scene refresh
|
||||
echo '{"type": "scene_refresh"}' | websocat ws://localhost:8765
|
||||
```
|
||||
|
||||
### Test with Python
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
import websockets
|
||||
import json
|
||||
|
||||
async def test_iobridge():
|
||||
uri = "ws://localhost:8765"
|
||||
async with websockets.connect(uri) as ws:
|
||||
# Test ping
|
||||
await ws.send(json.dumps({"type": "ping"}))
|
||||
response = await ws.recv()
|
||||
print(f"Ping response: {response}")
|
||||
|
||||
# Send command
|
||||
await ws.send(json.dumps({
|
||||
"type": "command",
|
||||
"text": "agarra el cubo rojo"
|
||||
}))
|
||||
|
||||
# Listen for responses
|
||||
async for message in ws:
|
||||
data = json.loads(message)
|
||||
print(f"Received: {data}")
|
||||
|
||||
asyncio.run(test_iobridge())
|
||||
```
|
||||
|
||||
### Test with curl (HTTP upgrade not supported directly)
|
||||
|
||||
Since WebSocket requires an upgrade handshake, use this shell script:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# test_iobridge.sh
|
||||
|
||||
# Using websocat for interactive testing
|
||||
websocat ws://localhost:8765 <<EOF
|
||||
{"type": "ping"}
|
||||
{"type": "command", "text": "sube"}
|
||||
{"type": "scene_refresh"}
|
||||
EOF
|
||||
```
|
||||
|
||||
## WebSocket Message Types
|
||||
|
||||
### Client -> Server
|
||||
|
||||
**Command (voice input)**
|
||||
```json
|
||||
{"type": "command", "text": "agarra el cubo rojo"}
|
||||
```
|
||||
|
||||
**Ping (health check)**
|
||||
```json
|
||||
# Ping
|
||||
{"type": "ping"}
|
||||
```
|
||||
Response: `{"type": "pong"}`
|
||||
|
||||
**Scene Refresh**
|
||||
```json
|
||||
{"type": "scene_refresh"}
|
||||
```
|
||||
|
||||
### Server -> Client (Broadcasts)
|
||||
|
||||
**Command Response**
|
||||
```json
|
||||
{
|
||||
"type": "response",
|
||||
"text": "Ok, voy a tomar",
|
||||
"status": "ok"
|
||||
}
|
||||
```
|
||||
|
||||
**Scene Update**
|
||||
```json
|
||||
{
|
||||
"type": "scene_updated",
|
||||
"objects": [
|
||||
{
|
||||
"object_type": "cubo",
|
||||
"color": "rojo",
|
||||
"size": "big",
|
||||
"position_mm": [150.0, 200.0, 280.0],
|
||||
"source": "detection"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Dora Dataflow Configuration
|
||||
|
||||
```yaml
|
||||
nodes:
|
||||
- id: iobridge
|
||||
build: pip install -e ./dora_iobridge
|
||||
path: dora_iobridge
|
||||
inputs:
|
||||
voice_out: voice_control/voice_out
|
||||
scene_update: voice_control/scene_update
|
||||
outputs:
|
||||
- voice_in
|
||||
env:
|
||||
VOICE_HOST: "0.0.0.0"
|
||||
VOICE_PORT: "8765"
|
||||
```
|
||||
|
||||
```bash
|
||||
dora up
|
||||
dora start dataflow.yml
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- dora-rs >= 0.3.9
|
||||
- pyarrow >= 12.0.0
|
||||
- websockets >= 12.0
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
"""Dora node bridging WebSocket IO to Dora topics."""
|
||||
"""Dora node bridging WebSocket IO to Dora topics.
|
||||
|
||||
Generic I/O bridge with fixed endpoints:
|
||||
- Outputs (WS → Dora): text_out, audio_out, data_out
|
||||
- Inputs (Dora → WS): text_in, audio_in, data_in
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
@@ -15,129 +21,164 @@ from websockets.server import serve, WebSocketServerProtocol
|
||||
|
||||
|
||||
class IoBridgeServer:
|
||||
"""WebSocket server that bridges clients to Dora dataflow."""
|
||||
|
||||
def __init__(self, host: str, port: int):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.clients: Set[WebSocketServerProtocol] = set()
|
||||
self.command_handler = None
|
||||
self.scene_refresh_handler = None
|
||||
# Callbacks for sending to Dora
|
||||
self.on_text: Optional[callable] = None
|
||||
self.on_audio: Optional[callable] = None
|
||||
self.on_data: Optional[callable] = None
|
||||
|
||||
async def handler(self, websocket: WebSocketServerProtocol):
|
||||
"""Handle WebSocket connection."""
|
||||
self.clients.add(websocket)
|
||||
try:
|
||||
async for message in websocket:
|
||||
try:
|
||||
data = json.loads(message)
|
||||
except json.JSONDecodeError:
|
||||
await websocket.send(
|
||||
json.dumps({"type": "error", "text": "Invalid JSON message"})
|
||||
)
|
||||
await websocket.send(json.dumps({
|
||||
"type": "error",
|
||||
"message": "Invalid JSON"
|
||||
}))
|
||||
continue
|
||||
response = await self._route_message(data, websocket)
|
||||
response = await self._route_message(data)
|
||||
if response:
|
||||
await websocket.send(json.dumps(response))
|
||||
finally:
|
||||
self.clients.discard(websocket)
|
||||
|
||||
async def _route_message(
|
||||
self, data: Dict[str, Any], websocket: WebSocketServerProtocol
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
async def _route_message(self, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Route incoming message to appropriate handler."""
|
||||
msg_type = data.get("type")
|
||||
if msg_type == "command":
|
||||
text = data.get("text", "")
|
||||
if self.command_handler:
|
||||
await self.command_handler(text)
|
||||
return None
|
||||
return {"type": "error", "text": "No command handler registered"}
|
||||
|
||||
if msg_type == "ping":
|
||||
return {"type": "pong"}
|
||||
if msg_type == "scene_refresh":
|
||||
if self.scene_refresh_handler:
|
||||
objects = await self.scene_refresh_handler()
|
||||
return {"type": "scene_updated", "objects": objects}
|
||||
return {"type": "error", "text": "No scene handler registered"}
|
||||
return {"type": "error", "text": f"Unknown message type: {msg_type}"}
|
||||
|
||||
if msg_type == "text":
|
||||
content = data.get("content", "")
|
||||
if self.on_text and content:
|
||||
self.on_text(content)
|
||||
return None
|
||||
|
||||
if msg_type == "audio":
|
||||
content = data.get("content", "") # base64 WAV
|
||||
if self.on_audio and content:
|
||||
self.on_audio(content)
|
||||
return None
|
||||
|
||||
if msg_type == "data":
|
||||
payload = data.get("payload", {})
|
||||
if self.on_data:
|
||||
self.on_data(payload)
|
||||
return None
|
||||
|
||||
return {"type": "error", "message": f"Unknown type: {msg_type}"}
|
||||
|
||||
async def broadcast(self, message: Dict[str, Any]):
|
||||
"""Broadcast message to all connected clients."""
|
||||
if not self.clients:
|
||||
return
|
||||
payload = json.dumps(message)
|
||||
await asyncio.gather(
|
||||
*[client.send(payload) for client in self.clients], return_exceptions=True
|
||||
*[client.send(payload) for client in self.clients],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
async def send(self, message: Dict[str, Any], websocket: WebSocketServerProtocol):
|
||||
await websocket.send(json.dumps(message))
|
||||
|
||||
async def start(self):
|
||||
"""Start the WebSocket server."""
|
||||
async with serve(self.handler, self.host, self.port):
|
||||
await asyncio.Future()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
host = os.getenv("VOICE_HOST", "0.0.0.0")
|
||||
port = int(os.getenv("VOICE_PORT", "8765"))
|
||||
input_topic = os.getenv("VOICE_IN_OUTPUT", "voice_in")
|
||||
response_input = os.getenv("VOICE_OUT_INPUT", "voice_out")
|
||||
scene_input = os.getenv("SCENE_INPUT", "scene_update")
|
||||
"""Main entry point."""
|
||||
host = os.getenv("WS_HOST", "0.0.0.0")
|
||||
port = int(os.getenv("WS_PORT", "9000"))
|
||||
|
||||
node = Node()
|
||||
server = IoBridgeServer(host, port)
|
||||
loop = asyncio.new_event_loop()
|
||||
|
||||
def push_command(text: str) -> None:
|
||||
# Callbacks: WebSocket → Dora
|
||||
def send_text(content: str) -> None:
|
||||
node.send_output(
|
||||
input_topic,
|
||||
pa.array([text]),
|
||||
metadata={"encoding": "utf-8", "timestamp_ns": time.time_ns()},
|
||||
"text_out",
|
||||
pa.array([content]),
|
||||
{"encoding": "utf-8", "timestamp_ns": time.time_ns()},
|
||||
)
|
||||
|
||||
async def handle_scene_refresh():
|
||||
return []
|
||||
def send_audio(content_b64: str) -> None:
|
||||
audio_bytes = base64.b64decode(content_b64)
|
||||
node.send_output(
|
||||
"audio_out",
|
||||
pa.array([audio_bytes]),
|
||||
{"encoding": "wav", "timestamp_ns": time.time_ns()},
|
||||
)
|
||||
|
||||
def command_handler(text: str):
|
||||
push_command(text)
|
||||
return None
|
||||
def send_data(payload: Dict[str, Any]) -> None:
|
||||
node.send_output(
|
||||
"data_out",
|
||||
pa.array([json.dumps(payload)]),
|
||||
{"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||
)
|
||||
|
||||
server.command_handler = command_handler
|
||||
server.scene_refresh_handler = handle_scene_refresh
|
||||
server.on_text = send_text
|
||||
server.on_audio = send_audio
|
||||
server.on_data = send_data
|
||||
|
||||
# Start WebSocket server in background thread
|
||||
def run_server():
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(server.start())
|
||||
|
||||
threading.Thread(target=run_server, daemon=True).start()
|
||||
|
||||
timestamp = time.strftime("%H:%M:%S")
|
||||
print(f"[iobridge {timestamp}] WebSocket server at ws://{host}:{port}", flush=True)
|
||||
|
||||
# Dora event loop: Dora → WebSocket
|
||||
for event in node:
|
||||
if event["type"] != "INPUT":
|
||||
continue
|
||||
|
||||
if event["id"] == response_input:
|
||||
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
message = {
|
||||
"type": "response",
|
||||
"text": payload.get("text", ""),
|
||||
"status": payload.get("status", "ok"),
|
||||
}
|
||||
except Exception:
|
||||
message = {"type": "response", "text": raw, "status": "ok"}
|
||||
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||
event_id = event["id"]
|
||||
raw = event["value"][0].as_py() if len(event["value"]) else None
|
||||
if raw is None:
|
||||
continue
|
||||
|
||||
if event["id"] == scene_input:
|
||||
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||
if not raw:
|
||||
continue
|
||||
if event_id == "text_in":
|
||||
# Text from Dora to broadcast
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
# Try to parse as JSON for structured response
|
||||
payload = json.loads(raw)
|
||||
message = {"type": "text", "content": payload.get("text", raw)}
|
||||
except json.JSONDecodeError:
|
||||
message = {"type": "text", "content": raw}
|
||||
else:
|
||||
message = {"type": "text", "content": str(raw)}
|
||||
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||
|
||||
elif event_id == "audio_in":
|
||||
# Audio from Dora to broadcast (binary → base64)
|
||||
if isinstance(raw, bytes):
|
||||
content_b64 = base64.b64encode(raw).decode("utf-8")
|
||||
else:
|
||||
content_b64 = raw
|
||||
message = {"type": "audio", "content": content_b64, "format": "wav"}
|
||||
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||
|
||||
elif event_id == "data_in":
|
||||
# Generic JSON data from Dora to broadcast
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
objects = payload.get("objects", [])
|
||||
message = {"type": "scene_updated", "objects": objects}
|
||||
except Exception:
|
||||
message = {"type": "scene_updated", "objects": []}
|
||||
payload = json.loads(raw) if isinstance(raw, str) else raw
|
||||
except json.JSONDecodeError:
|
||||
payload = {"raw": raw}
|
||||
message = {"type": "data", "payload": payload}
|
||||
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user