Add voice control, working but need more work
This commit is contained in:
22
config.toml
Normal file
22
config.toml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
[roi]
|
||||||
|
roi_top_left = [500, 230]
|
||||||
|
roi_bottom_right = [775, 510]
|
||||||
|
|
||||||
|
[bucket_positions]
|
||||||
|
blue_bucket_pos = [400, 90]
|
||||||
|
red_bucket_pos = [550, 90]
|
||||||
|
yellow_bucket_pos = [550, 630]
|
||||||
|
white_bucket_pos = [400, 630]
|
||||||
|
|
||||||
|
[dominant_colors]
|
||||||
|
blue = [255, 0, 0]
|
||||||
|
red = [0, 0, 255]
|
||||||
|
yellow = [0, 255, 255]
|
||||||
|
white = [255, 255, 255]
|
||||||
|
|
||||||
|
[object_parameters]
|
||||||
|
size_threshold = 4200
|
||||||
|
big_height = 125.9
|
||||||
|
small_height = 106.0
|
||||||
|
bottom_height = 68.0
|
||||||
|
normal_height = 220.0
|
||||||
133
dataflow_voice_control_ulite6_zed.yml
Normal file
133
dataflow_voice_control_ulite6_zed.yml
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
nodes:
|
||||||
|
- id: zed_camera_cpp
|
||||||
|
build: bash -lc "cmake -S dora_zed_cpp -B dora_zed_cpp/build && cmake --build dora_zed_cpp/build"
|
||||||
|
path: dora_zed_cpp/build/dora_zed_cpp
|
||||||
|
env:
|
||||||
|
ZED_RESOLUTION: "720"
|
||||||
|
ZED_FPS: "15"
|
||||||
|
ZED_DEPTH_MODE: "NEURAL"
|
||||||
|
ZED_DEPTH_MIN_MM: "10"
|
||||||
|
ZED_DEPTH_MAX_MM: "600"
|
||||||
|
ZED_DEPTH_FILL: "false"
|
||||||
|
ZED_FLIP: "ON"
|
||||||
|
ZED_WARMUP_FRAMES: "30"
|
||||||
|
inputs:
|
||||||
|
tick: dora/timer/millis/100
|
||||||
|
outputs:
|
||||||
|
- image_bgr
|
||||||
|
- camera_info
|
||||||
|
- point_cloud
|
||||||
|
- id: ulite6
|
||||||
|
build: uv pip install -e dora_ulite6
|
||||||
|
path: dora_ulite6/dora_ulite6/main.py
|
||||||
|
inputs:
|
||||||
|
tick: dora/timer/millis/10
|
||||||
|
command: voice/robot_cmd
|
||||||
|
outputs:
|
||||||
|
- tcp_pose
|
||||||
|
- status
|
||||||
|
env:
|
||||||
|
ROBOT_IP: "192.168.1.192"
|
||||||
|
DEFAULT_SPEED: "30"
|
||||||
|
DEFAULT_UNITS: "mm"
|
||||||
|
API_HOST: "0.0.0.0"
|
||||||
|
API_PORT: "9000"
|
||||||
|
VACUUM_ENABLED: "true"
|
||||||
|
# Initial position on startup: "home", "pose", or "none"
|
||||||
|
# Set to "none" - voice control handles initial positioning
|
||||||
|
INIT_MODE: "none"
|
||||||
|
- id: iobridge
|
||||||
|
build: |
|
||||||
|
uv venv -p 3.12 --seed --allow-existing
|
||||||
|
uv pip install -e dora_iobridge
|
||||||
|
path: dora_iobridge/dora_iobridge/main.py
|
||||||
|
env:
|
||||||
|
VIRTUAL_ENV: ./.venv
|
||||||
|
VOICE_HOST: "0.0.0.0"
|
||||||
|
VOICE_PORT: "8765"
|
||||||
|
VOICE_IN_OUTPUT: "voice_in"
|
||||||
|
VOICE_OUT_INPUT: "voice_out"
|
||||||
|
SCENE_INPUT: "scene_update"
|
||||||
|
inputs:
|
||||||
|
voice_out: voice/voice_out
|
||||||
|
scene_update: voice/scene_update
|
||||||
|
tick: dora/timer/millis/100
|
||||||
|
outputs:
|
||||||
|
- voice_in
|
||||||
|
- id: detector
|
||||||
|
build: |
|
||||||
|
uv venv -p 3.12 --seed --allow-existing
|
||||||
|
uv pip install -e dora_detector
|
||||||
|
path: dora_detector/dora_detector/main.py
|
||||||
|
env:
|
||||||
|
VIRTUAL_ENV: ./.venv
|
||||||
|
IMAGE_INPUT: "image_bgr"
|
||||||
|
POINT_CLOUD_INPUT: "point_cloud"
|
||||||
|
POSE_INPUT: "tcp_pose"
|
||||||
|
OBJECTS_OUTPUT: "objects"
|
||||||
|
IMAGE_OUTPUT: "image_annotated"
|
||||||
|
CALIBRATION_FILE: "calibration_ulite6_zed.npz"
|
||||||
|
DETECTOR_WEIGHTS: "trained_models/yolo8n.pt"
|
||||||
|
CONFIG_FILE: "config.toml"
|
||||||
|
ROI_TOP_LEFT: "500,230"
|
||||||
|
ROI_BOTTOM_RIGHT: "775,510"
|
||||||
|
SIZE_THRESHOLD: "4200"
|
||||||
|
DETECT_EVERY_N: "3"
|
||||||
|
MIN_DEPTH_MM: "10"
|
||||||
|
MAX_DEPTH_MM: "600"
|
||||||
|
inputs:
|
||||||
|
image_bgr: zed_camera_cpp/image_bgr
|
||||||
|
point_cloud: zed_camera_cpp/point_cloud
|
||||||
|
tcp_pose: ulite6/tcp_pose
|
||||||
|
tick: dora/timer/millis/100
|
||||||
|
outputs:
|
||||||
|
- objects
|
||||||
|
- image_annotated
|
||||||
|
- id: voice
|
||||||
|
build: |
|
||||||
|
uv venv -p 3.12 --seed --allow-existing
|
||||||
|
uv pip install -e dora_voice_control
|
||||||
|
path: dora_voice_control/dora_voice_control/main.py
|
||||||
|
env:
|
||||||
|
VIRTUAL_ENV: ./.venv
|
||||||
|
OBJECTS_INPUT: "objects"
|
||||||
|
POSE_INPUT: "tcp_pose"
|
||||||
|
STATUS_INPUT: "status"
|
||||||
|
COMMAND_OUTPUT: "robot_cmd"
|
||||||
|
CONFIG_FILE: "config.toml"
|
||||||
|
# Map Spanish command names to detector class names
|
||||||
|
CLASS_MAP: '{"cilindro": "cylinder", "cubo": "cube", "estrella": "star", "caja": "box", "amarillo": "yellow", "rojo": "red", "azul": "blue", "blanco": "white", "grande": "big", "pequeno": "small"}'
|
||||||
|
VOICE_IN_INPUT: "voice_in"
|
||||||
|
VOICE_OUT_OUTPUT: "voice_out"
|
||||||
|
SCENE_OUTPUT: "scene_update"
|
||||||
|
TCP_OFFSET_MM: "63.0"
|
||||||
|
APPROACH_OFFSET_MM: "50.0"
|
||||||
|
STEP_MM: "20.0"
|
||||||
|
DEFAULT_ROLL: "180.0"
|
||||||
|
DEFAULT_PITCH: "0.0"
|
||||||
|
DEFAULT_YAW: "0.0"
|
||||||
|
DRY_RUN: "false"
|
||||||
|
# Initial position (used on startup and reset command)
|
||||||
|
INIT_ON_START: "true"
|
||||||
|
INIT_X: "300.0"
|
||||||
|
INIT_Y: "0.0"
|
||||||
|
INIT_Z: "350.0"
|
||||||
|
INIT_ROLL: "180.0"
|
||||||
|
INIT_PITCH: "0.0"
|
||||||
|
INIT_YAW: "0.0"
|
||||||
|
IMAGE_INPUT: "image_annotated"
|
||||||
|
IMAGE_WIDTH: "1280"
|
||||||
|
IMAGE_HEIGHT: "720"
|
||||||
|
API_ENABLED: "true"
|
||||||
|
API_PORT: "8080"
|
||||||
|
inputs:
|
||||||
|
objects: detector/objects
|
||||||
|
tcp_pose: ulite6/tcp_pose
|
||||||
|
status: ulite6/status
|
||||||
|
voice_in: iobridge/voice_in
|
||||||
|
image_annotated: detector/image_annotated
|
||||||
|
tick: dora/timer/millis/100
|
||||||
|
outputs:
|
||||||
|
- robot_cmd
|
||||||
|
- voice_out
|
||||||
|
- scene_update
|
||||||
1
dora_detector/dora_detector/__init__.py
Normal file
1
dora_detector/dora_detector/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Dora detector node package."""
|
||||||
513
dora_detector/dora_detector/main.py
Normal file
513
dora_detector/dora_detector/main.py
Normal file
@@ -0,0 +1,513 @@
|
|||||||
|
"""Dora node for YOLO detection and base-frame object localization."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
from dora import Node
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tomllib
|
||||||
|
except ModuleNotFoundError: # pragma: no cover
|
||||||
|
import tomli as tomllib
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_WEIGHTS = os.path.join(os.getcwd(), "trained_models", "yolo8n.pt")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DetectionConfig:
|
||||||
|
imgsz: int
|
||||||
|
conf: float
|
||||||
|
iou: float
|
||||||
|
size_threshold: int
|
||||||
|
roi_top_left: Tuple[int, int]
|
||||||
|
roi_bottom_right: Tuple[int, int]
|
||||||
|
use_roi: bool
|
||||||
|
detect_every_n: int
|
||||||
|
min_depth_mm: float
|
||||||
|
max_depth_mm: float
|
||||||
|
color_blue: Tuple[int, int, int]
|
||||||
|
color_red: Tuple[int, int, int]
|
||||||
|
color_yellow: Tuple[int, int, int]
|
||||||
|
color_white: Tuple[int, int, int]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_int_pair(raw: str, default: Tuple[int, int]) -> Tuple[int, int]:
|
||||||
|
try:
|
||||||
|
parts = [p.strip() for p in raw.split(",")]
|
||||||
|
if len(parts) >= 2:
|
||||||
|
return int(parts[0]), int(parts[1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_float_pair(raw: str, default: Tuple[float, float]) -> Tuple[float, float]:
|
||||||
|
try:
|
||||||
|
parts = [p.strip() for p in raw.split(",")]
|
||||||
|
if len(parts) >= 2:
|
||||||
|
return float(parts[0]), float(parts[1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_color(raw: str, default: Tuple[int, int, int]) -> Tuple[int, int, int]:
|
||||||
|
try:
|
||||||
|
parts = [p.strip() for p in raw.split(",")]
|
||||||
|
if len(parts) >= 3:
|
||||||
|
return int(parts[0]), int(parts[1]), int(parts[2])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _rotation_matrix_xyz(
|
||||||
|
roll_deg: float, pitch_deg: float, yaw_deg: float
|
||||||
|
) -> np.ndarray:
|
||||||
|
roll = np.deg2rad(roll_deg)
|
||||||
|
pitch = np.deg2rad(pitch_deg)
|
||||||
|
yaw = np.deg2rad(yaw_deg)
|
||||||
|
cx, sx = np.cos(roll), np.sin(roll)
|
||||||
|
cy, sy = np.cos(pitch), np.sin(pitch)
|
||||||
|
cz, sz = np.cos(yaw), np.sin(yaw)
|
||||||
|
rot_x = np.array([[1.0, 0.0, 0.0], [0.0, cx, -sx], [0.0, sx, cx]])
|
||||||
|
rot_y = np.array([[cy, 0.0, sy], [0.0, 1.0, 0.0], [-sy, 0.0, cy]])
|
||||||
|
rot_z = np.array([[cz, -sz, 0.0], [sz, cz, 0.0], [0.0, 0.0, 1.0]])
|
||||||
|
return rot_z @ rot_y @ rot_x
|
||||||
|
|
||||||
|
|
||||||
|
def _pose_to_matrix(tcp_pose_mm_deg: List[float]) -> np.ndarray:
|
||||||
|
x, y, z, roll, pitch, yaw = tcp_pose_mm_deg
|
||||||
|
rot = _rotation_matrix_xyz(roll, pitch, yaw)
|
||||||
|
mat = np.eye(4)
|
||||||
|
mat[:3, :3] = rot
|
||||||
|
mat[:3, 3] = np.array([x, y, z], dtype=np.float64) / 1000.0
|
||||||
|
return mat
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_image(storage: pa.Array, metadata: Dict[str, Any]) -> np.ndarray:
|
||||||
|
encoding = str(metadata.get("encoding", "bgr8")).lower()
|
||||||
|
width = metadata.get("width")
|
||||||
|
height = metadata.get("height")
|
||||||
|
if (width is None or height is None) and "shape" in metadata:
|
||||||
|
shape = metadata.get("shape")
|
||||||
|
if isinstance(shape, (list, tuple)) and len(shape) >= 2:
|
||||||
|
height = height if height is not None else int(shape[0])
|
||||||
|
width = width if width is not None else int(shape[1])
|
||||||
|
if width is None or height is None:
|
||||||
|
raise KeyError("width/height (or shape) missing from metadata")
|
||||||
|
|
||||||
|
if encoding == "bgr8":
|
||||||
|
frame = storage.to_numpy().astype(np.uint8).reshape((height, width, 3))
|
||||||
|
return frame.copy()
|
||||||
|
if encoding == "rgb8":
|
||||||
|
frame = storage.to_numpy().astype(np.uint8).reshape((height, width, 3))
|
||||||
|
return cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
||||||
|
if encoding in {"jpeg", "jpg", "jpe", "bmp", "webp", "png"}:
|
||||||
|
frame = storage.to_numpy().astype(np.uint8)
|
||||||
|
return cv2.imdecode(frame, cv2.IMREAD_COLOR)
|
||||||
|
if encoding == "yuv420":
|
||||||
|
yuv = storage.to_numpy().astype(np.uint8)
|
||||||
|
yuv = yuv[: width * height * 3 // 2].reshape((height * 3 // 2, width))
|
||||||
|
return cv2.cvtColor(yuv, cv2.COLOR_YUV420p2BGR)
|
||||||
|
|
||||||
|
raise RuntimeError(f"Unsupported image encoding: {encoding}")
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_point_cloud(storage: pa.Array, metadata: Dict[str, Any]) -> np.ndarray:
|
||||||
|
dtype_str = str(metadata.get("dtype", "float32"))
|
||||||
|
if dtype_str != "float32":
|
||||||
|
raise RuntimeError(f"Unsupported point cloud dtype: {dtype_str}")
|
||||||
|
shape = metadata.get("shape")
|
||||||
|
if not isinstance(shape, (list, tuple)) or len(shape) < 3:
|
||||||
|
raise KeyError("point cloud shape missing from metadata")
|
||||||
|
height, width, channels = [int(v) for v in shape[:3]]
|
||||||
|
if channels < 3:
|
||||||
|
raise ValueError("point cloud requires at least 3 channels")
|
||||||
|
raw = storage.to_numpy().astype(np.uint8).tobytes()
|
||||||
|
values = np.frombuffer(raw, dtype=np.float32)
|
||||||
|
return values.reshape((height, width, channels))
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_point(point_xyz: np.ndarray, cfg: DetectionConfig) -> bool:
|
||||||
|
if not np.all(np.isfinite(point_xyz)):
|
||||||
|
return False
|
||||||
|
z = float(point_xyz[2])
|
||||||
|
if z <= 0:
|
||||||
|
return False
|
||||||
|
if z < cfg.min_depth_mm or z > cfg.max_depth_mm:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_point(
|
||||||
|
point_cloud: np.ndarray, x: int, y: int, cfg: DetectionConfig
|
||||||
|
) -> Optional[np.ndarray]:
|
||||||
|
h, w, _ = point_cloud.shape
|
||||||
|
if x < 0 or y < 0 or x >= w or y >= h:
|
||||||
|
return None
|
||||||
|
point_xyz = point_cloud[y, x, :3].astype(np.float64)
|
||||||
|
if _valid_point(point_xyz, cfg):
|
||||||
|
return point_xyz
|
||||||
|
radius = 3
|
||||||
|
samples = []
|
||||||
|
for dy in range(-radius, radius + 1):
|
||||||
|
for dx in range(-radius, radius + 1):
|
||||||
|
xx = x + dx
|
||||||
|
yy = y + dy
|
||||||
|
if xx < 0 or yy < 0 or xx >= w or yy >= h:
|
||||||
|
continue
|
||||||
|
p = point_cloud[yy, xx, :3].astype(np.float64)
|
||||||
|
if _valid_point(p, cfg):
|
||||||
|
samples.append(p)
|
||||||
|
if not samples:
|
||||||
|
return None
|
||||||
|
return np.median(np.stack(samples, axis=0), axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
def _dominant_color(image: np.ndarray, bbox: List[int]) -> Tuple[int, int, int]:
|
||||||
|
x1, y1, x2, y2 = bbox
|
||||||
|
x1 = max(0, x1)
|
||||||
|
y1 = max(0, y1)
|
||||||
|
x2 = min(image.shape[1], x2)
|
||||||
|
y2 = min(image.shape[0], y2)
|
||||||
|
roi = image[y1:y2, x1:x2]
|
||||||
|
if roi.size == 0:
|
||||||
|
return (0, 0, 0)
|
||||||
|
color = np.median(roi, axis=(0, 1)).astype(int)
|
||||||
|
return int(color[0]), int(color[1]), int(color[2])
|
||||||
|
|
||||||
|
|
||||||
|
def _closest_color(color: Tuple[int, int, int], cfg: DetectionConfig) -> str:
|
||||||
|
colors = {
|
||||||
|
"blue": np.array(cfg.color_blue, dtype=np.float64),
|
||||||
|
"red": np.array(cfg.color_red, dtype=np.float64),
|
||||||
|
"yellow": np.array(cfg.color_yellow, dtype=np.float64),
|
||||||
|
"white": np.array(cfg.color_white, dtype=np.float64),
|
||||||
|
}
|
||||||
|
color_vec = np.array(color, dtype=np.float64)
|
||||||
|
best_name = "unknown"
|
||||||
|
best_dist = float("inf")
|
||||||
|
for name, value in colors.items():
|
||||||
|
dist = np.linalg.norm(color_vec - value)
|
||||||
|
if dist < best_dist:
|
||||||
|
best_name = name
|
||||||
|
best_dist = dist
|
||||||
|
return best_name
|
||||||
|
|
||||||
|
|
||||||
|
def _load_calibration(calibration_file: str) -> np.ndarray:
|
||||||
|
calib = np.load(calibration_file, allow_pickle=True)
|
||||||
|
t_cam2gripper = calib["T_cam2gripper"]
|
||||||
|
return t_cam2gripper
|
||||||
|
|
||||||
|
|
||||||
|
def _load_config_file(path: str) -> Dict[str, Any]:
|
||||||
|
if not path or not os.path.exists(path):
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as handle:
|
||||||
|
return tomllib.load(handle)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_config(config_path: str) -> DetectionConfig:
|
||||||
|
cfg_data = _load_config_file(config_path)
|
||||||
|
roi_cfg = cfg_data.get("roi", {})
|
||||||
|
colors_cfg = cfg_data.get("dominant_colors", {})
|
||||||
|
obj_cfg = cfg_data.get("object_parameters", {})
|
||||||
|
|
||||||
|
imgsz = int(os.getenv("YOLO_IMGSZ", "640"))
|
||||||
|
conf = float(os.getenv("YOLO_CONF", "0.25"))
|
||||||
|
iou = float(os.getenv("YOLO_IOU", "0.45"))
|
||||||
|
size_threshold = int(
|
||||||
|
os.getenv("SIZE_THRESHOLD", str(obj_cfg.get("size_threshold", 4200)))
|
||||||
|
)
|
||||||
|
roi_top_left = _parse_int_pair(
|
||||||
|
os.getenv(
|
||||||
|
"ROI_TOP_LEFT",
|
||||||
|
",".join([str(v) for v in roi_cfg.get("roi_top_left", [500, 230])]),
|
||||||
|
),
|
||||||
|
(500, 230),
|
||||||
|
)
|
||||||
|
roi_bottom_right = _parse_int_pair(
|
||||||
|
os.getenv(
|
||||||
|
"ROI_BOTTOM_RIGHT",
|
||||||
|
",".join([str(v) for v in roi_cfg.get("roi_bottom_right", [775, 510])]),
|
||||||
|
),
|
||||||
|
(775, 510),
|
||||||
|
)
|
||||||
|
use_roi = os.getenv("USE_ROI", "true").lower() in ("true", "1", "yes")
|
||||||
|
detect_every_n = int(os.getenv("DETECT_EVERY_N", "3"))
|
||||||
|
min_depth_mm = float(os.getenv("MIN_DEPTH_MM", "10"))
|
||||||
|
max_depth_mm = float(os.getenv("MAX_DEPTH_MM", "600"))
|
||||||
|
color_blue = _parse_color(
|
||||||
|
os.getenv(
|
||||||
|
"COLOR_BLUE",
|
||||||
|
",".join([str(v) for v in colors_cfg.get("blue", [255, 0, 0])]),
|
||||||
|
),
|
||||||
|
(255, 0, 0),
|
||||||
|
)
|
||||||
|
color_red = _parse_color(
|
||||||
|
os.getenv(
|
||||||
|
"COLOR_RED",
|
||||||
|
",".join([str(v) for v in colors_cfg.get("red", [0, 0, 255])]),
|
||||||
|
),
|
||||||
|
(0, 0, 255),
|
||||||
|
)
|
||||||
|
color_yellow = _parse_color(
|
||||||
|
os.getenv(
|
||||||
|
"COLOR_YELLOW",
|
||||||
|
",".join([str(v) for v in colors_cfg.get("yellow", [0, 255, 255])]),
|
||||||
|
),
|
||||||
|
(0, 255, 255),
|
||||||
|
)
|
||||||
|
color_white = _parse_color(
|
||||||
|
os.getenv(
|
||||||
|
"COLOR_WHITE",
|
||||||
|
",".join([str(v) for v in colors_cfg.get("white", [255, 255, 255])]),
|
||||||
|
),
|
||||||
|
(255, 255, 255),
|
||||||
|
)
|
||||||
|
return DetectionConfig(
|
||||||
|
imgsz=imgsz,
|
||||||
|
conf=conf,
|
||||||
|
iou=iou,
|
||||||
|
size_threshold=size_threshold,
|
||||||
|
roi_top_left=roi_top_left,
|
||||||
|
roi_bottom_right=roi_bottom_right,
|
||||||
|
use_roi=use_roi,
|
||||||
|
detect_every_n=detect_every_n,
|
||||||
|
min_depth_mm=min_depth_mm,
|
||||||
|
max_depth_mm=max_depth_mm,
|
||||||
|
color_blue=color_blue,
|
||||||
|
color_red=color_red,
|
||||||
|
color_yellow=color_yellow,
|
||||||
|
color_white=color_white,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _within_roi(bbox: List[int], cfg: DetectionConfig) -> bool:
|
||||||
|
if not cfg.use_roi:
|
||||||
|
return True
|
||||||
|
x1, y1, x2, y2 = bbox
|
||||||
|
rx1, ry1 = cfg.roi_top_left
|
||||||
|
rx2, ry2 = cfg.roi_bottom_right
|
||||||
|
return x1 >= rx1 and y1 >= ry1 and x2 <= rx2 and y2 <= ry2
|
||||||
|
|
||||||
|
|
||||||
|
def _draw_detections(
|
||||||
|
frame: np.ndarray, objects: List[Dict[str, Any]], cfg: DetectionConfig
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Draw bounding boxes and labels on frame."""
|
||||||
|
annotated = frame.copy()
|
||||||
|
|
||||||
|
# Draw ROI rectangle (always visible)
|
||||||
|
cv2.rectangle(
|
||||||
|
annotated,
|
||||||
|
cfg.roi_top_left,
|
||||||
|
cfg.roi_bottom_right,
|
||||||
|
(0, 255, 0) if cfg.use_roi else (128, 128, 128),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
# Label the ROI
|
||||||
|
cv2.putText(
|
||||||
|
annotated,
|
||||||
|
"ROI",
|
||||||
|
(cfg.roi_top_left[0] + 5, cfg.roi_top_left[1] + 20),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.6,
|
||||||
|
(0, 255, 0) if cfg.use_roi else (128, 128, 128),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Color mapping for visualization
|
||||||
|
color_map = {
|
||||||
|
"blue": (255, 100, 0),
|
||||||
|
"red": (0, 0, 255),
|
||||||
|
"yellow": (0, 255, 255),
|
||||||
|
"white": (200, 200, 200),
|
||||||
|
"unknown": (128, 128, 128),
|
||||||
|
}
|
||||||
|
|
||||||
|
for obj in objects:
|
||||||
|
bbox = obj.get("bbox", [0, 0, 0, 0])
|
||||||
|
color_name = obj.get("color", "unknown")
|
||||||
|
obj_type = obj.get("object_type", "?")
|
||||||
|
size = obj.get("size", "?")
|
||||||
|
pos = obj.get("position_mm", [0, 0, 0])
|
||||||
|
|
||||||
|
color = color_map.get(color_name, (128, 128, 128))
|
||||||
|
|
||||||
|
# Draw bounding box
|
||||||
|
cv2.rectangle(annotated, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
|
||||||
|
|
||||||
|
# Draw label background
|
||||||
|
label = f"{obj_type} {color_name} {size}"
|
||||||
|
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
||||||
|
cv2.rectangle(
|
||||||
|
annotated,
|
||||||
|
(bbox[0], bbox[1] - th - 8),
|
||||||
|
(bbox[0] + tw + 4, bbox[1]),
|
||||||
|
color,
|
||||||
|
-1,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
annotated,
|
||||||
|
label,
|
||||||
|
(bbox[0] + 2, bbox[1] - 4),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 0, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Draw position
|
||||||
|
pos_label = f"[{pos[0]:.0f}, {pos[1]:.0f}, {pos[2]:.0f}]"
|
||||||
|
cv2.putText(
|
||||||
|
annotated,
|
||||||
|
pos_label,
|
||||||
|
(bbox[0], bbox[3] + 15),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.4,
|
||||||
|
color,
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
return annotated
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
image_input = os.getenv("IMAGE_INPUT", "image_bgr")
|
||||||
|
point_cloud_input = os.getenv("POINT_CLOUD_INPUT", "point_cloud")
|
||||||
|
pose_input = os.getenv("POSE_INPUT", "tcp_pose")
|
||||||
|
objects_output = os.getenv("OBJECTS_OUTPUT", "objects")
|
||||||
|
image_output = os.getenv("IMAGE_OUTPUT", "image_annotated")
|
||||||
|
calibration_file = os.getenv("CALIBRATION_FILE", "calibration.npz")
|
||||||
|
weights = os.getenv("DETECTOR_WEIGHTS", DEFAULT_WEIGHTS)
|
||||||
|
config_file = os.getenv("CONFIG_FILE", "config.toml")
|
||||||
|
|
||||||
|
cfg = _build_config(config_file)
|
||||||
|
model = YOLO(weights)
|
||||||
|
|
||||||
|
if calibration_file and not os.path.isabs(calibration_file):
|
||||||
|
config_path = os.path.join("config", calibration_file)
|
||||||
|
calibration_file = config_path if os.path.exists(config_path) else calibration_file
|
||||||
|
t_cam2gripper = _load_calibration(calibration_file)
|
||||||
|
|
||||||
|
node = Node()
|
||||||
|
latest_pose: Optional[List[float]] = None
|
||||||
|
latest_pose_at: Optional[float] = None
|
||||||
|
latest_point_cloud: Optional[np.ndarray] = None
|
||||||
|
latest_pc_at: Optional[float] = None
|
||||||
|
frame_count = 0
|
||||||
|
|
||||||
|
for event in node:
|
||||||
|
if event["type"] != "INPUT":
|
||||||
|
continue
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
|
if event["id"] == pose_input:
|
||||||
|
tcp_pose = event["value"].to_numpy().astype(np.float64).reshape(-1)
|
||||||
|
if tcp_pose.size >= 6:
|
||||||
|
latest_pose = tcp_pose[:6].tolist()
|
||||||
|
latest_pose_at = now
|
||||||
|
continue
|
||||||
|
|
||||||
|
if event["id"] == point_cloud_input:
|
||||||
|
latest_point_cloud = _decode_point_cloud(event["value"], event.get("metadata", {}))
|
||||||
|
latest_pc_at = now
|
||||||
|
continue
|
||||||
|
|
||||||
|
if event["id"] != image_input:
|
||||||
|
continue
|
||||||
|
|
||||||
|
frame_count += 1
|
||||||
|
if frame_count % max(1, cfg.detect_every_n) != 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if latest_pose is None or latest_point_cloud is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
frame = _decode_image(event["value"], event.get("metadata", {}))
|
||||||
|
|
||||||
|
results = model.predict(
|
||||||
|
frame, imgsz=cfg.imgsz, conf=cfg.conf, iou=cfg.iou, verbose=False
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
base_T_flange = _pose_to_matrix(latest_pose)
|
||||||
|
objects: List[Dict[str, Any]] = []
|
||||||
|
for r in results.boxes:
|
||||||
|
bbox = [int(x) for x in r.xyxy[0]]
|
||||||
|
if not _within_roi(bbox, cfg):
|
||||||
|
continue
|
||||||
|
cx = int((bbox[0] + bbox[2]) / 2)
|
||||||
|
cy = int((bbox[1] + bbox[3]) / 2)
|
||||||
|
point_cam_mm = _sample_point(latest_point_cloud, cx, cy, cfg)
|
||||||
|
if point_cam_mm is None:
|
||||||
|
continue
|
||||||
|
point_cam_m = np.array(
|
||||||
|
[point_cam_mm[0], point_cam_mm[1], point_cam_mm[2], 1.0],
|
||||||
|
dtype=np.float64,
|
||||||
|
)
|
||||||
|
point_cam_m[:3] /= 1000.0
|
||||||
|
point_base = base_T_flange @ t_cam2gripper @ point_cam_m
|
||||||
|
point_base_mm = point_base[:3] * 1000.0
|
||||||
|
|
||||||
|
dominant = _dominant_color(frame, bbox)
|
||||||
|
color_name = _closest_color(dominant, cfg)
|
||||||
|
area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
|
||||||
|
size_label = "big" if area >= cfg.size_threshold else "small"
|
||||||
|
|
||||||
|
objects.append(
|
||||||
|
{
|
||||||
|
"object_type": results.names[int(r.cls.item())],
|
||||||
|
"confidence": float(r.conf.item()),
|
||||||
|
"color": color_name,
|
||||||
|
"size": size_label,
|
||||||
|
"bbox": bbox,
|
||||||
|
"center_px": [cx, cy],
|
||||||
|
"position_mm": [
|
||||||
|
float(point_base_mm[0]),
|
||||||
|
float(point_base_mm[1]),
|
||||||
|
float(point_base_mm[2]),
|
||||||
|
],
|
||||||
|
"timestamp_ns": time.time_ns(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = json.dumps({"objects": objects, "timestamp_ns": time.time_ns()})
|
||||||
|
node.send_output(
|
||||||
|
objects_output,
|
||||||
|
pa.array([payload]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send annotated image
|
||||||
|
annotated = _draw_detections(frame, objects, cfg)
|
||||||
|
h, w = annotated.shape[:2]
|
||||||
|
node.send_output(
|
||||||
|
image_output,
|
||||||
|
pa.array(annotated.ravel().tolist()),
|
||||||
|
metadata={
|
||||||
|
"encoding": "bgr8",
|
||||||
|
"width": w,
|
||||||
|
"height": h,
|
||||||
|
"timestamp_ns": time.time_ns(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
19
dora_detector/pyproject.toml
Normal file
19
dora_detector/pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[project]
|
||||||
|
name = "dora-detector"
|
||||||
|
version = "0.1.0"
|
||||||
|
license = { file = "MIT" }
|
||||||
|
authors = [{ name = "Dora" }]
|
||||||
|
description = "Dora node for YOLO-based object detection with ZED point cloud"
|
||||||
|
|
||||||
|
requires-python = ">=3.8"
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"dora-rs >= 0.3.9",
|
||||||
|
"numpy < 2.0.0",
|
||||||
|
"opencv-python >= 4.1.1",
|
||||||
|
"pyarrow >= 12.0.0",
|
||||||
|
"ultralytics >= 8.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
dora-detector = "dora_detector.main:main"
|
||||||
178
dora_iobridge/README.md
Normal file
178
dora_iobridge/README.md
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
# Dora IOBridge Node
|
||||||
|
|
||||||
|
A WebSocket server that bridges web clients with the Dora dataflow for real-time voice commands and scene updates.
|
||||||
|
|
||||||
|
## Inputs/Outputs
|
||||||
|
|
||||||
|
| Input | Type | Description |
|
||||||
|
|----------------|--------|---------------------------------------|
|
||||||
|
| `voice_out` | JSON | Response from voice control node |
|
||||||
|
| `scene_update` | JSON | Scene objects from voice control |
|
||||||
|
|
||||||
|
| Output | Type | Description |
|
||||||
|
|----------------|--------|---------------------------------------|
|
||||||
|
| `voice_in` | string | Voice commands forwarded to Dora |
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
VOICE_HOST=0.0.0.0 # Bind address
|
||||||
|
VOICE_PORT=8765 # Listen port
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd dora_iobridge
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Test with WebSocket (wscat)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install wscat
|
||||||
|
npm install -g wscat
|
||||||
|
|
||||||
|
# Connect to the server
|
||||||
|
wscat -c ws://localhost:8765
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test with curl (websocat)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install websocat
|
||||||
|
# Ubuntu: sudo apt install websocat
|
||||||
|
# macOS: brew install websocat
|
||||||
|
|
||||||
|
# Send a ping
|
||||||
|
echo '{"type": "ping"}' | websocat ws://localhost:8765
|
||||||
|
# Response: {"type": "pong"}
|
||||||
|
|
||||||
|
# Send a voice command
|
||||||
|
echo '{"type": "command", "text": "sube"}' | websocat ws://localhost:8765
|
||||||
|
|
||||||
|
# Request scene refresh
|
||||||
|
echo '{"type": "scene_refresh"}' | websocat ws://localhost:8765
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test with Python
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
import websockets
|
||||||
|
import json
|
||||||
|
|
||||||
|
async def test_iobridge():
|
||||||
|
uri = "ws://localhost:8765"
|
||||||
|
async with websockets.connect(uri) as ws:
|
||||||
|
# Test ping
|
||||||
|
await ws.send(json.dumps({"type": "ping"}))
|
||||||
|
response = await ws.recv()
|
||||||
|
print(f"Ping response: {response}")
|
||||||
|
|
||||||
|
# Send command
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "command",
|
||||||
|
"text": "agarra el cubo rojo"
|
||||||
|
}))
|
||||||
|
|
||||||
|
# Listen for responses
|
||||||
|
async for message in ws:
|
||||||
|
data = json.loads(message)
|
||||||
|
print(f"Received: {data}")
|
||||||
|
|
||||||
|
asyncio.run(test_iobridge())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test with curl (HTTP upgrade not supported directly)
|
||||||
|
|
||||||
|
Since WebSocket requires an upgrade handshake, use this shell script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# test_iobridge.sh
|
||||||
|
|
||||||
|
# Using websocat for interactive testing
|
||||||
|
websocat ws://localhost:8765 <<EOF
|
||||||
|
{"type": "ping"}
|
||||||
|
{"type": "command", "text": "sube"}
|
||||||
|
{"type": "scene_refresh"}
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
## WebSocket Message Types
|
||||||
|
|
||||||
|
### Client -> Server
|
||||||
|
|
||||||
|
**Command (voice input)**
|
||||||
|
```json
|
||||||
|
{"type": "command", "text": "agarra el cubo rojo"}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Ping (health check)**
|
||||||
|
```json
|
||||||
|
{"type": "ping"}
|
||||||
|
```
|
||||||
|
Response: `{"type": "pong"}`
|
||||||
|
|
||||||
|
**Scene Refresh**
|
||||||
|
```json
|
||||||
|
{"type": "scene_refresh"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server -> Client (Broadcasts)
|
||||||
|
|
||||||
|
**Command Response**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "response",
|
||||||
|
"text": "Ok, voy a tomar",
|
||||||
|
"status": "ok"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Scene Update**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "scene_updated",
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"object_type": "cubo",
|
||||||
|
"color": "rojo",
|
||||||
|
"size": "big",
|
||||||
|
"position_mm": [150.0, 200.0, 280.0],
|
||||||
|
"source": "detection"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dora Dataflow Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
nodes:
|
||||||
|
- id: iobridge
|
||||||
|
build: pip install -e ./dora_iobridge
|
||||||
|
path: dora_iobridge
|
||||||
|
inputs:
|
||||||
|
voice_out: voice_control/voice_out
|
||||||
|
scene_update: voice_control/scene_update
|
||||||
|
outputs:
|
||||||
|
- voice_in
|
||||||
|
env:
|
||||||
|
VOICE_HOST: "0.0.0.0"
|
||||||
|
VOICE_PORT: "8765"
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dora up
|
||||||
|
dora start dataflow.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- dora-rs >= 0.3.9
|
||||||
|
- pyarrow >= 12.0.0
|
||||||
|
- websockets >= 12.0
|
||||||
1
dora_iobridge/dora_iobridge/__init__.py
Normal file
1
dora_iobridge/dora_iobridge/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Dora IO bridge node package."""
|
||||||
145
dora_iobridge/dora_iobridge/main.py
Normal file
145
dora_iobridge/dora_iobridge/main.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
"""Dora node bridging WebSocket IO to Dora topics."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, Optional, Set
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
from dora import Node
|
||||||
|
from websockets.server import serve, WebSocketServerProtocol
|
||||||
|
|
||||||
|
|
||||||
|
class IoBridgeServer:
|
||||||
|
def __init__(self, host: str, port: int):
|
||||||
|
self.host = host
|
||||||
|
self.port = port
|
||||||
|
self.clients: Set[WebSocketServerProtocol] = set()
|
||||||
|
self.command_handler = None
|
||||||
|
self.scene_refresh_handler = None
|
||||||
|
|
||||||
|
async def handler(self, websocket: WebSocketServerProtocol):
|
||||||
|
self.clients.add(websocket)
|
||||||
|
try:
|
||||||
|
async for message in websocket:
|
||||||
|
try:
|
||||||
|
data = json.loads(message)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
await websocket.send(
|
||||||
|
json.dumps({"type": "error", "text": "Invalid JSON message"})
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
response = await self._route_message(data, websocket)
|
||||||
|
if response:
|
||||||
|
await websocket.send(json.dumps(response))
|
||||||
|
finally:
|
||||||
|
self.clients.discard(websocket)
|
||||||
|
|
||||||
|
async def _route_message(
|
||||||
|
self, data: Dict[str, Any], websocket: WebSocketServerProtocol
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
msg_type = data.get("type")
|
||||||
|
if msg_type == "command":
|
||||||
|
text = data.get("text", "")
|
||||||
|
if self.command_handler:
|
||||||
|
await self.command_handler(text)
|
||||||
|
return None
|
||||||
|
return {"type": "error", "text": "No command handler registered"}
|
||||||
|
if msg_type == "ping":
|
||||||
|
return {"type": "pong"}
|
||||||
|
if msg_type == "scene_refresh":
|
||||||
|
if self.scene_refresh_handler:
|
||||||
|
objects = await self.scene_refresh_handler()
|
||||||
|
return {"type": "scene_updated", "objects": objects}
|
||||||
|
return {"type": "error", "text": "No scene handler registered"}
|
||||||
|
return {"type": "error", "text": f"Unknown message type: {msg_type}"}
|
||||||
|
|
||||||
|
async def broadcast(self, message: Dict[str, Any]):
|
||||||
|
if not self.clients:
|
||||||
|
return
|
||||||
|
payload = json.dumps(message)
|
||||||
|
await asyncio.gather(
|
||||||
|
*[client.send(payload) for client in self.clients], return_exceptions=True
|
||||||
|
)
|
||||||
|
|
||||||
|
async def send(self, message: Dict[str, Any], websocket: WebSocketServerProtocol):
|
||||||
|
await websocket.send(json.dumps(message))
|
||||||
|
|
||||||
|
async def start(self):
|
||||||
|
async with serve(self.handler, self.host, self.port):
|
||||||
|
await asyncio.Future()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
host = os.getenv("VOICE_HOST", "0.0.0.0")
|
||||||
|
port = int(os.getenv("VOICE_PORT", "8765"))
|
||||||
|
input_topic = os.getenv("VOICE_IN_OUTPUT", "voice_in")
|
||||||
|
response_input = os.getenv("VOICE_OUT_INPUT", "voice_out")
|
||||||
|
scene_input = os.getenv("SCENE_INPUT", "scene_update")
|
||||||
|
|
||||||
|
node = Node()
|
||||||
|
server = IoBridgeServer(host, port)
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
|
||||||
|
def push_command(text: str) -> None:
|
||||||
|
node.send_output(
|
||||||
|
input_topic,
|
||||||
|
pa.array([text]),
|
||||||
|
metadata={"encoding": "utf-8", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def handle_scene_refresh():
|
||||||
|
return []
|
||||||
|
|
||||||
|
def command_handler(text: str):
|
||||||
|
push_command(text)
|
||||||
|
return None
|
||||||
|
|
||||||
|
server.command_handler = command_handler
|
||||||
|
server.scene_refresh_handler = handle_scene_refresh
|
||||||
|
|
||||||
|
def run_server():
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
loop.run_until_complete(server.start())
|
||||||
|
|
||||||
|
threading.Thread(target=run_server, daemon=True).start()
|
||||||
|
|
||||||
|
for event in node:
|
||||||
|
if event["type"] != "INPUT":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if event["id"] == response_input:
|
||||||
|
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw)
|
||||||
|
message = {
|
||||||
|
"type": "response",
|
||||||
|
"text": payload.get("text", ""),
|
||||||
|
"status": payload.get("status", "ok"),
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
message = {"type": "response", "text": raw, "status": "ok"}
|
||||||
|
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if event["id"] == scene_input:
|
||||||
|
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw)
|
||||||
|
objects = payload.get("objects", [])
|
||||||
|
message = {"type": "scene_updated", "objects": objects}
|
||||||
|
except Exception:
|
||||||
|
message = {"type": "scene_updated", "objects": []}
|
||||||
|
asyncio.run_coroutine_threadsafe(server.broadcast(message), loop)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
17
dora_iobridge/pyproject.toml
Normal file
17
dora_iobridge/pyproject.toml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
[project]
|
||||||
|
name = "dora-iobridge"
|
||||||
|
version = "0.1.0"
|
||||||
|
license = { file = "MIT" }
|
||||||
|
authors = [{ name = "Dora" }]
|
||||||
|
description = "Dora node bridging WebSocket IO to Dora topics"
|
||||||
|
|
||||||
|
requires-python = ">=3.8"
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"dora-rs >= 0.3.9",
|
||||||
|
"pyarrow >= 12.0.0",
|
||||||
|
"websockets >= 12.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
dora-iobridge = "dora_iobridge.main:main"
|
||||||
@@ -781,6 +781,12 @@ def _status_snapshot(helper: ULite6Helper) -> Dict[str, Any]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _log(msg: str) -> None:
|
||||||
|
"""Print a timestamped log message."""
|
||||||
|
timestamp = time.strftime("%H:%M:%S")
|
||||||
|
print(f"[ulite6 {timestamp}] {msg}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
node = Node()
|
node = Node()
|
||||||
|
|
||||||
@@ -791,7 +797,42 @@ def main() -> None:
|
|||||||
api_port = int(os.getenv("API_PORT", "8080"))
|
api_port = int(os.getenv("API_PORT", "8080"))
|
||||||
vacuum_enabled = os.getenv("VACUUM_ENABLED", "false").lower() in ("true", "1", "yes")
|
vacuum_enabled = os.getenv("VACUUM_ENABLED", "false").lower() in ("true", "1", "yes")
|
||||||
|
|
||||||
|
# Initial position settings
|
||||||
|
init_mode = os.getenv("INIT_MODE", "none").lower() # "home", "pose", or "none"
|
||||||
|
init_x = float(os.getenv("INIT_X", "300.0"))
|
||||||
|
init_y = float(os.getenv("INIT_Y", "0.0"))
|
||||||
|
init_z = float(os.getenv("INIT_Z", "250.0"))
|
||||||
|
init_roll = float(os.getenv("INIT_ROLL", "180.0"))
|
||||||
|
init_pitch = float(os.getenv("INIT_PITCH", "0.0"))
|
||||||
|
init_yaw = float(os.getenv("INIT_YAW", "0.0"))
|
||||||
|
init_speed = float(os.getenv("INIT_SPEED", "50.0"))
|
||||||
|
|
||||||
|
_log(f"Connecting to robot at {robot_ip}...")
|
||||||
helper = ULite6Helper(robot_ip)
|
helper = ULite6Helper(robot_ip)
|
||||||
|
_log("Robot connected")
|
||||||
|
|
||||||
|
# Move to initial position on startup
|
||||||
|
if init_mode == "home":
|
||||||
|
_log("Moving to home position...")
|
||||||
|
code = helper.go_home()
|
||||||
|
if code == 0:
|
||||||
|
_log("Home position reached")
|
||||||
|
else:
|
||||||
|
_log(f"Home failed with code {code}")
|
||||||
|
elif init_mode == "pose":
|
||||||
|
_log(f"Moving to initial pose: [{init_x}, {init_y}, {init_z}] roll={init_roll} pitch={init_pitch} yaw={init_yaw}")
|
||||||
|
code = helper.move_to_pose(
|
||||||
|
init_x, init_y, init_z,
|
||||||
|
init_roll, init_pitch, init_yaw,
|
||||||
|
speed=init_speed,
|
||||||
|
units="mm",
|
||||||
|
)
|
||||||
|
if code == 0:
|
||||||
|
_log("Initial pose reached")
|
||||||
|
else:
|
||||||
|
_log(f"Move to initial pose failed with code {code}")
|
||||||
|
else:
|
||||||
|
_log("Skipping initial position (INIT_MODE=none)")
|
||||||
|
|
||||||
# Create and start FastAPI server in background thread
|
# Create and start FastAPI server in background thread
|
||||||
app = create_api(helper, default_speed, default_units, vacuum_enabled)
|
app = create_api(helper, default_speed, default_units, vacuum_enabled)
|
||||||
@@ -895,6 +936,28 @@ def main() -> None:
|
|||||||
code=code,
|
code=code,
|
||||||
status=_status_snapshot(helper),
|
status=_status_snapshot(helper),
|
||||||
)
|
)
|
||||||
|
elif action in ("vacuum_on", "vacuum_off"):
|
||||||
|
if not vacuum_enabled:
|
||||||
|
_send_command_status(
|
||||||
|
node,
|
||||||
|
command_id=command_id,
|
||||||
|
action=action,
|
||||||
|
ok=False,
|
||||||
|
message="Vacuum gripper not enabled",
|
||||||
|
status=_status_snapshot(helper),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
vacuum_on = action == "vacuum_on"
|
||||||
|
code = helper.set_vacuum_gripper(vacuum_on)
|
||||||
|
_send_command_status(
|
||||||
|
node,
|
||||||
|
command_id=command_id,
|
||||||
|
action=action,
|
||||||
|
ok=code == 0,
|
||||||
|
message="Vacuum command executed",
|
||||||
|
code=code,
|
||||||
|
status=_status_snapshot(helper),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
_send_command_status(
|
_send_command_status(
|
||||||
node,
|
node,
|
||||||
|
|||||||
211
dora_voice_control/README.md
Normal file
211
dora_voice_control/README.md
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
# Dora Voice Control Node
|
||||||
|
|
||||||
|
A Dora node that processes Spanish voice commands from children and translates them into robot actions (movement, grasping, releasing objects). Includes a web debug interface.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Spanish voice command parsing (rule-based or Gemini LLM)
|
||||||
|
- Real-time web debug interface
|
||||||
|
- Command queue management
|
||||||
|
- Workspace bounds validation
|
||||||
|
- Object detection integration
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
dora_voice_control/
|
||||||
|
├── __init__.py
|
||||||
|
├── main.py # Main Dora node entry point
|
||||||
|
├── api.py # FastAPI web server
|
||||||
|
├── config.py # Configuration management
|
||||||
|
├── models.py # Pydantic request/response models
|
||||||
|
├── parser.py # Voice command parsing logic
|
||||||
|
├── state.py # Shared state management
|
||||||
|
└── templates.py # HTML template for web interface
|
||||||
|
```
|
||||||
|
|
||||||
|
## Web Debug Interface
|
||||||
|
|
||||||
|
Access the debug interface at `http://localhost:8080` (default).
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Real-time status monitoring (pose, objects, queue)
|
||||||
|
- Send manual voice commands
|
||||||
|
- Quick command buttons
|
||||||
|
- View parse results
|
||||||
|
- Command history
|
||||||
|
- Clear queue
|
||||||
|
|
||||||
|
## Inputs/Outputs
|
||||||
|
|
||||||
|
| Input | Type | Description |
|
||||||
|
|---------------|--------|------------------------------------------|
|
||||||
|
| `voice_in` | string | Text transcription of voice command |
|
||||||
|
| `tcp_pose` | array | Current robot pose [x, y, z, roll, pitch, yaw] |
|
||||||
|
| `objects` | JSON | Detected objects from vision system |
|
||||||
|
| `status` | JSON | Command execution status from robot |
|
||||||
|
|
||||||
|
| Output | Type | Description |
|
||||||
|
|---------------|--------|------------------------------------------|
|
||||||
|
| `robot_cmd` | JSON | Robot command with action and payload |
|
||||||
|
| `voice_out` | JSON | Response confirmation to user |
|
||||||
|
| `scene_update`| JSON | Updated scene with all visible objects |
|
||||||
|
|
||||||
|
## Supported Commands (Spanish)
|
||||||
|
|
||||||
|
| Command | Action | Example |
|
||||||
|
|---------------|----------------|--------------------------------|
|
||||||
|
| `subir` | Move up | "sube" |
|
||||||
|
| `bajar` | Move down | "baja" |
|
||||||
|
| `tomar` | Grab object | "agarra el cubo rojo" |
|
||||||
|
| `soltar` | Release object | "suelta en la caja azul" |
|
||||||
|
| `ir` | Go to object | "ve al cilindro" |
|
||||||
|
| `reiniciar` | Reset | "reinicia" |
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Web API Server
|
||||||
|
API_ENABLED=true # Enable/disable web interface
|
||||||
|
API_HOST=0.0.0.0 # Bind address
|
||||||
|
API_PORT=8080 # Listen port
|
||||||
|
|
||||||
|
# TCP Parameters
|
||||||
|
TCP_OFFSET_MM=63.0 # Z-offset to object surface
|
||||||
|
APPROACH_OFFSET_MM=50.0 # Safe approach distance above object
|
||||||
|
STEP_MM=20.0 # Distance for up/down increments
|
||||||
|
|
||||||
|
# LLM Configuration (optional)
|
||||||
|
LLM_PROVIDER=rules # "rules" or "gemini"
|
||||||
|
GOOGLE_API_KEY=your_key # Required if using gemini
|
||||||
|
GEMINI_MODEL=gemini-2.0-flash
|
||||||
|
|
||||||
|
# Workspace Safety (optional)
|
||||||
|
WORKSPACE_MIN_X=-300
|
||||||
|
WORKSPACE_MAX_X=300
|
||||||
|
WORKSPACE_MIN_Y=-300
|
||||||
|
WORKSPACE_MAX_Y=300
|
||||||
|
WORKSPACE_MIN_Z=0
|
||||||
|
WORKSPACE_MAX_Z=500
|
||||||
|
|
||||||
|
# Misc
|
||||||
|
DRY_RUN=false # Skip sending robot commands
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd dora_voice_control
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
|
# With LLM support
|
||||||
|
pip install -e ".[llm]"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Web Interface
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start the node (standalone for testing)
|
||||||
|
python -m dora_voice_control.main
|
||||||
|
|
||||||
|
# Open in browser
|
||||||
|
open http://localhost:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get status
|
||||||
|
curl http://localhost:8080/api/status
|
||||||
|
|
||||||
|
# Get objects
|
||||||
|
curl http://localhost:8080/api/objects
|
||||||
|
|
||||||
|
# Get queue
|
||||||
|
curl http://localhost:8080/api/queue
|
||||||
|
|
||||||
|
# Send command
|
||||||
|
curl -X POST http://localhost:8080/api/command \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"text": "sube"}'
|
||||||
|
|
||||||
|
# Clear queue
|
||||||
|
curl -X POST http://localhost:8080/api/queue/clear
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python Test
|
||||||
|
|
||||||
|
```python
|
||||||
|
from dora_voice_control.parser import rule_parse, normalize
|
||||||
|
|
||||||
|
# Test command parsing
|
||||||
|
text = "agarra el cubo rojo grande"
|
||||||
|
result = rule_parse(normalize(text))
|
||||||
|
print(result)
|
||||||
|
# {'resultado': 'ok', 'accion': 'tomar', 'objeto': 'cubo', 'color': 'rojo', 'tamano': 'grande'}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dora Dataflow Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
nodes:
|
||||||
|
- id: voice_control
|
||||||
|
build: pip install -e ./dora_voice_control
|
||||||
|
path: dora_voice_control
|
||||||
|
inputs:
|
||||||
|
voice_in: iobridge/voice_in
|
||||||
|
tcp_pose: robot/tcp_pose
|
||||||
|
objects: detector/objects
|
||||||
|
status: robot/status
|
||||||
|
outputs:
|
||||||
|
- robot_cmd
|
||||||
|
- voice_out
|
||||||
|
- scene_update
|
||||||
|
env:
|
||||||
|
API_ENABLED: "true"
|
||||||
|
API_PORT: "8080"
|
||||||
|
DRY_RUN: "false"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Message Examples
|
||||||
|
|
||||||
|
### Input: voice_in
|
||||||
|
```
|
||||||
|
"sube"
|
||||||
|
"agarra el cubo rojo"
|
||||||
|
"suelta en la caja azul"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output: robot_cmd
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
|
"action": "move_to_pose",
|
||||||
|
"payload": {
|
||||||
|
"x": 150.0,
|
||||||
|
"y": 200.0,
|
||||||
|
"z": 280.0,
|
||||||
|
"roll": 180.0,
|
||||||
|
"pitch": 0.0,
|
||||||
|
"yaw": 0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output: voice_out
|
||||||
|
```json
|
||||||
|
{"text": "Ok, voy a subir", "status": "ok"}
|
||||||
|
{"text": "No entendi el comando", "status": "error"}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- dora-rs >= 0.3.9
|
||||||
|
- numpy < 2.0.0
|
||||||
|
- pyarrow >= 12.0.0
|
||||||
|
- fastapi >= 0.109.0
|
||||||
|
- uvicorn >= 0.27.0
|
||||||
|
- pydantic >= 2.0.0
|
||||||
|
- google-genai (optional, for Gemini mode)
|
||||||
1
dora_voice_control/dora_voice_control/__init__.py
Normal file
1
dora_voice_control/dora_voice_control/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Dora voice control node package."""
|
||||||
162
dora_voice_control/dora_voice_control/api.py
Normal file
162
dora_voice_control/dora_voice_control/api.py
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
"""FastAPI application for the voice control web interface."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import HTMLResponse, Response
|
||||||
|
|
||||||
|
# Handle both package and direct script execution
|
||||||
|
# __package__ is None when run as script, '' when imported from a script
|
||||||
|
if not __package__:
|
||||||
|
_pkg_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
if _pkg_dir not in sys.path:
|
||||||
|
sys.path.insert(0, _pkg_dir)
|
||||||
|
from models import CommandRequest, CommandResponse
|
||||||
|
from state import SharedState
|
||||||
|
from templates import HTML_TEMPLATE
|
||||||
|
else:
|
||||||
|
from .models import CommandRequest, CommandResponse
|
||||||
|
from .state import SharedState
|
||||||
|
from .templates import HTML_TEMPLATE
|
||||||
|
|
||||||
|
|
||||||
|
def create_api(state: SharedState) -> FastAPI:
|
||||||
|
"""Create FastAPI application with voice control endpoints."""
|
||||||
|
app = FastAPI(
|
||||||
|
title="Voice Control Debug API",
|
||||||
|
description="Debug interface for the voice control node",
|
||||||
|
version="0.1.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/", response_class=HTMLResponse)
|
||||||
|
def index() -> str:
|
||||||
|
"""Serve the web interface."""
|
||||||
|
return HTML_TEMPLATE
|
||||||
|
|
||||||
|
@app.get("/api/status")
|
||||||
|
def get_status() -> dict:
|
||||||
|
"""Get current status."""
|
||||||
|
try:
|
||||||
|
return state.get_status()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/objects")
|
||||||
|
def get_objects() -> dict:
|
||||||
|
"""Get detected and static objects."""
|
||||||
|
try:
|
||||||
|
return state.get_objects()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/queue")
|
||||||
|
def get_queue() -> list:
|
||||||
|
"""Get the command queue."""
|
||||||
|
try:
|
||||||
|
return state.get_queue()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.post("/api/queue/clear")
|
||||||
|
def clear_queue() -> dict:
|
||||||
|
"""Clear the command queue."""
|
||||||
|
try:
|
||||||
|
with state._lock:
|
||||||
|
state.voice_state.queue.clear()
|
||||||
|
return {"ok": True}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/history")
|
||||||
|
def get_history() -> list:
|
||||||
|
"""Get command history."""
|
||||||
|
try:
|
||||||
|
return state.get_history()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/errors")
|
||||||
|
def get_errors() -> list:
|
||||||
|
"""Get error log."""
|
||||||
|
try:
|
||||||
|
return state.get_errors()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.post("/api/command", response_model=CommandResponse)
|
||||||
|
def send_command(request: CommandRequest) -> CommandResponse:
|
||||||
|
"""Send a voice command."""
|
||||||
|
try:
|
||||||
|
callback = state.get_command_callback()
|
||||||
|
if callback is None:
|
||||||
|
return CommandResponse(ok=False, text="No command handler available", status="error")
|
||||||
|
|
||||||
|
result = callback(request.text)
|
||||||
|
return CommandResponse(
|
||||||
|
ok=result.get("status") == "ok",
|
||||||
|
text=result.get("text", ""),
|
||||||
|
status=result.get("status", "error"),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/image")
|
||||||
|
def get_image() -> Response:
|
||||||
|
"""Get the latest camera image as JPEG."""
|
||||||
|
try:
|
||||||
|
image_data = state.get_image()
|
||||||
|
if image_data is None:
|
||||||
|
# Return a 1x1 transparent pixel if no image
|
||||||
|
return Response(
|
||||||
|
content=b"",
|
||||||
|
media_type="image/jpeg",
|
||||||
|
status_code=204,
|
||||||
|
)
|
||||||
|
return Response(
|
||||||
|
content=image_data,
|
||||||
|
media_type="image/jpeg",
|
||||||
|
headers={"Cache-Control": "no-cache, no-store, must-revalidate"},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/image/info")
|
||||||
|
def get_image_info() -> dict:
|
||||||
|
"""Get image metadata."""
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"has_image": state.get_image() is not None,
|
||||||
|
"age_ms": state.get_image_age_ms(),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def run_uvicorn(app: FastAPI, host: str, port: int) -> None:
|
||||||
|
"""Run uvicorn server (for use in background thread)."""
|
||||||
|
config = uvicorn.Config(app, host=host, port=port, log_level="warning")
|
||||||
|
server = uvicorn.Server(config)
|
||||||
|
server.run()
|
||||||
|
|
||||||
|
|
||||||
|
def start_api_server(state: SharedState, config: Any) -> threading.Thread:
|
||||||
|
"""Start the API server in a background thread."""
|
||||||
|
import time as _time
|
||||||
|
app = create_api(state)
|
||||||
|
api_thread = threading.Thread(
|
||||||
|
target=run_uvicorn,
|
||||||
|
args=(app, config.host, config.port),
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
api_thread.start()
|
||||||
|
timestamp = _time.strftime("%H:%M:%S")
|
||||||
|
print(f"[voice_control {timestamp}] Web interface at http://{config.host}:{config.port}", flush=True)
|
||||||
|
return api_thread
|
||||||
95
dora_voice_control/dora_voice_control/config.py
Normal file
95
dora_voice_control/dora_voice_control/config.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
"""Configuration for the voice control node."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VoiceConfig:
|
||||||
|
"""Configuration for voice control."""
|
||||||
|
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
tcp_offset_mm: float
|
||||||
|
approach_offset_mm: float
|
||||||
|
step_mm: float
|
||||||
|
default_roll: float
|
||||||
|
default_pitch: float
|
||||||
|
default_yaw: float
|
||||||
|
dry_run: bool
|
||||||
|
workspace_min: Tuple[Optional[float], Optional[float], Optional[float]]
|
||||||
|
workspace_max: Tuple[Optional[float], Optional[float], Optional[float]]
|
||||||
|
class_map: Dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ApiConfig:
|
||||||
|
"""Configuration for the web API server."""
|
||||||
|
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
enabled: bool
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_float_env(name: str) -> Optional[float]:
|
||||||
|
"""Parse an optional float from environment variable."""
|
||||||
|
raw = os.getenv(name)
|
||||||
|
if raw is None or raw == "":
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(raw)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_class_map(raw: str) -> Dict[str, str]:
|
||||||
|
"""Parse JSON class mapping from string."""
|
||||||
|
import json
|
||||||
|
|
||||||
|
if not raw:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return {str(k): str(v) for k, v in data.items()}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def load_voice_config() -> VoiceConfig:
|
||||||
|
"""Load voice configuration from environment variables."""
|
||||||
|
return VoiceConfig(
|
||||||
|
host="",
|
||||||
|
port=0,
|
||||||
|
tcp_offset_mm=float(os.getenv("TCP_OFFSET_MM", "63.0")),
|
||||||
|
approach_offset_mm=float(os.getenv("APPROACH_OFFSET_MM", "50.0")),
|
||||||
|
step_mm=float(os.getenv("STEP_MM", "20.0")),
|
||||||
|
default_roll=float(os.getenv("DEFAULT_ROLL", "180.0")),
|
||||||
|
default_pitch=float(os.getenv("DEFAULT_PITCH", "0.0")),
|
||||||
|
default_yaw=float(os.getenv("DEFAULT_YAW", "0.0")),
|
||||||
|
dry_run=os.getenv("DRY_RUN", "false").lower() in ("true", "1", "yes"),
|
||||||
|
workspace_min=(
|
||||||
|
_parse_float_env("WORKSPACE_MIN_X"),
|
||||||
|
_parse_float_env("WORKSPACE_MIN_Y"),
|
||||||
|
_parse_float_env("WORKSPACE_MIN_Z"),
|
||||||
|
),
|
||||||
|
workspace_max=(
|
||||||
|
_parse_float_env("WORKSPACE_MAX_X"),
|
||||||
|
_parse_float_env("WORKSPACE_MAX_Y"),
|
||||||
|
_parse_float_env("WORKSPACE_MAX_Z"),
|
||||||
|
),
|
||||||
|
class_map=_parse_class_map(os.getenv("CLASS_MAP", "")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_api_config() -> ApiConfig:
|
||||||
|
"""Load API server configuration from environment variables."""
|
||||||
|
return ApiConfig(
|
||||||
|
host=os.getenv("API_HOST", "0.0.0.0"),
|
||||||
|
port=int(os.getenv("API_PORT", "8080")),
|
||||||
|
enabled=os.getenv("API_ENABLED", "true").lower() in ("true", "1", "yes"),
|
||||||
|
)
|
||||||
501
dora_voice_control/dora_voice_control/main.py
Normal file
501
dora_voice_control/dora_voice_control/main.py
Normal file
@@ -0,0 +1,501 @@
|
|||||||
|
"""Dora node for voice control with safe robot commands."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from collections import deque
|
||||||
|
from typing import Any, Deque, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
from dora import Node
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tomllib
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
import tomli as tomllib
|
||||||
|
|
||||||
|
# Handle both package and direct script execution
|
||||||
|
# __package__ is None when run as script, '' when imported from a script
|
||||||
|
_RUNNING_AS_SCRIPT = not __package__
|
||||||
|
|
||||||
|
if _RUNNING_AS_SCRIPT:
|
||||||
|
# Running as script - use absolute imports
|
||||||
|
_pkg_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
if _pkg_dir not in sys.path:
|
||||||
|
sys.path.insert(0, _pkg_dir)
|
||||||
|
from config import VoiceConfig, load_api_config, load_voice_config
|
||||||
|
from parser import normalize, parse_command
|
||||||
|
from state import RobotStep, SharedState
|
||||||
|
from api import start_api_server
|
||||||
|
else:
|
||||||
|
# Running as package - use relative imports
|
||||||
|
from .config import VoiceConfig, load_api_config, load_voice_config
|
||||||
|
from .parser import normalize, parse_command
|
||||||
|
from .state import RobotStep, SharedState
|
||||||
|
from .api import start_api_server
|
||||||
|
|
||||||
|
|
||||||
|
def _within_bounds(
|
||||||
|
point_mm: np.ndarray,
|
||||||
|
min_xyz: Tuple[Optional[float], Optional[float], Optional[float]],
|
||||||
|
max_xyz: Tuple[Optional[float], Optional[float], Optional[float]],
|
||||||
|
) -> bool:
|
||||||
|
"""Check if point is within workspace bounds."""
|
||||||
|
x, y, z = point_mm.tolist()
|
||||||
|
min_x, min_y, min_z = min_xyz
|
||||||
|
max_x, max_y, max_z = max_xyz
|
||||||
|
if min_x is not None and x < min_x:
|
||||||
|
return False
|
||||||
|
if max_x is not None and x > max_x:
|
||||||
|
return False
|
||||||
|
if min_y is not None and y < min_y:
|
||||||
|
return False
|
||||||
|
if max_y is not None and y > max_y:
|
||||||
|
return False
|
||||||
|
if min_z is not None and z < min_z:
|
||||||
|
return False
|
||||||
|
if max_z is not None and z > max_z:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _translate_target(token: str, mapping: Dict[str, str]) -> str:
|
||||||
|
"""Translate object name using class map."""
|
||||||
|
if token in mapping:
|
||||||
|
return mapping[token]
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def _load_config_file(path: str) -> Dict[str, Any]:
|
||||||
|
"""Load TOML configuration file."""
|
||||||
|
if not path or not os.path.exists(path):
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as handle:
|
||||||
|
return tomllib.load(handle)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _load_bucket_objects(config_path: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Load bucket positions from config file."""
|
||||||
|
cfg = _load_config_file(config_path)
|
||||||
|
buckets = cfg.get("bucket_positions", {})
|
||||||
|
obj_cfg = cfg.get("object_parameters", {})
|
||||||
|
base_z = float(obj_cfg.get("normal_height", 220.0))
|
||||||
|
out = []
|
||||||
|
for key, color in [
|
||||||
|
("blue_bucket_pos", "blue"),
|
||||||
|
("red_bucket_pos", "red"),
|
||||||
|
("yellow_bucket_pos", "yellow"),
|
||||||
|
("white_bucket_pos", "white"),
|
||||||
|
]:
|
||||||
|
pos = buckets.get(key)
|
||||||
|
if not isinstance(pos, list) or len(pos) < 2:
|
||||||
|
continue
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"object_type": "box",
|
||||||
|
"color": color,
|
||||||
|
"size": "big",
|
||||||
|
"position_mm": [float(pos[0]), float(pos[1]), base_z],
|
||||||
|
"source": "config",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _send_dora_command(
|
||||||
|
node: Node, output_name: str, action: str, payload: Dict[str, Any]
|
||||||
|
) -> str:
|
||||||
|
"""Send a robot command via Dora."""
|
||||||
|
command_id = str(uuid.uuid4())
|
||||||
|
message = {"id": command_id, "action": action, "payload": payload}
|
||||||
|
node.send_output(
|
||||||
|
output_name,
|
||||||
|
pa.array([json.dumps(message)]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
return command_id
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_status_payload(value: pa.Array) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Parse status payload from robot."""
|
||||||
|
if len(value) == 0:
|
||||||
|
return None
|
||||||
|
raw = value[0].as_py()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _log(msg: str) -> None:
|
||||||
|
"""Print a timestamped log message."""
|
||||||
|
timestamp = time.strftime("%H:%M:%S")
|
||||||
|
print(f"[voice_control {timestamp}] {msg}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Main entry point for the voice control node."""
|
||||||
|
_log("Starting voice control node...")
|
||||||
|
|
||||||
|
# Load configuration
|
||||||
|
cfg = load_voice_config()
|
||||||
|
api_cfg = load_api_config()
|
||||||
|
|
||||||
|
# Environment variables for I/O topics
|
||||||
|
objects_input = os.getenv("OBJECTS_INPUT", "objects")
|
||||||
|
voice_in_input = os.getenv("VOICE_IN_INPUT", "voice_in")
|
||||||
|
voice_out_output = os.getenv("VOICE_OUT_OUTPUT", "voice_out")
|
||||||
|
scene_output = os.getenv("SCENE_OUTPUT", "scene_update")
|
||||||
|
pose_input = os.getenv("POSE_INPUT", "tcp_pose")
|
||||||
|
status_input = os.getenv("STATUS_INPUT", "status")
|
||||||
|
command_output = os.getenv("COMMAND_OUTPUT", "robot_cmd")
|
||||||
|
image_input = os.getenv("IMAGE_INPUT", "image_annotated")
|
||||||
|
llm_provider = os.getenv("LLM_PROVIDER", "rules").lower()
|
||||||
|
config_file = os.getenv("CONFIG_FILE", "config.toml")
|
||||||
|
|
||||||
|
# Image dimensions (will be detected from first frame)
|
||||||
|
image_width = int(os.getenv("IMAGE_WIDTH", "1280"))
|
||||||
|
image_height = int(os.getenv("IMAGE_HEIGHT", "720"))
|
||||||
|
|
||||||
|
# Initial/home position for reset command
|
||||||
|
init_x = float(os.getenv("INIT_X", "300.0"))
|
||||||
|
init_y = float(os.getenv("INIT_Y", "0.0"))
|
||||||
|
init_z = float(os.getenv("INIT_Z", "250.0"))
|
||||||
|
init_roll = float(os.getenv("INIT_ROLL", "180.0"))
|
||||||
|
init_pitch = float(os.getenv("INIT_PITCH", "0.0"))
|
||||||
|
init_yaw = float(os.getenv("INIT_YAW", "0.0"))
|
||||||
|
|
||||||
|
_log(f"Config: tcp_offset={cfg.tcp_offset_mm}mm, approach_offset={cfg.approach_offset_mm}mm, step={cfg.step_mm}mm")
|
||||||
|
_log(f"Initial position: [{init_x}, {init_y}, {init_z}]")
|
||||||
|
_log(f"LLM provider: {llm_provider}")
|
||||||
|
_log(f"Dry run: {cfg.dry_run}")
|
||||||
|
|
||||||
|
# Initialize shared state
|
||||||
|
shared_state = SharedState()
|
||||||
|
state = shared_state.voice_state
|
||||||
|
state.static_objects = _load_bucket_objects(config_file)
|
||||||
|
pending_intents: Deque[Dict[str, Any]] = deque()
|
||||||
|
|
||||||
|
_log(f"Loaded {len(state.static_objects)} static objects from config")
|
||||||
|
|
||||||
|
# Queue initial position movement on startup (same as reiniciar)
|
||||||
|
init_on_start = os.getenv("INIT_ON_START", "true").lower() in ("true", "1", "yes")
|
||||||
|
send_init_scene_reset = init_on_start # Flag to send scene reset after node starts
|
||||||
|
if init_on_start:
|
||||||
|
_log(f"Startup: resetting scene and moving to home [{init_x}, {init_y}, {init_z}]")
|
||||||
|
# Clear detected objects
|
||||||
|
state.latest_objects = []
|
||||||
|
state.latest_objects_at = None
|
||||||
|
# Queue vacuum off and move to home
|
||||||
|
state.queue.append(RobotStep(action="vacuum_off", payload={}))
|
||||||
|
state.queue.append(
|
||||||
|
RobotStep(
|
||||||
|
action="move_to_pose",
|
||||||
|
payload={
|
||||||
|
"x": init_x,
|
||||||
|
"y": init_y,
|
||||||
|
"z": init_z,
|
||||||
|
"roll": init_roll,
|
||||||
|
"pitch": init_pitch,
|
||||||
|
"yaw": init_yaw,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def command_handler(transcript: str) -> Dict[str, str]:
|
||||||
|
"""Handle voice command and return response."""
|
||||||
|
_log(f"Voice input received: \"{transcript}\"")
|
||||||
|
llm_result = parse_command(transcript, llm_provider)
|
||||||
|
_log(f"Parse result: {llm_result}")
|
||||||
|
|
||||||
|
# Update debug state
|
||||||
|
shared_state.update_voice_input(transcript, llm_result, time.monotonic())
|
||||||
|
|
||||||
|
if llm_result.get("resultado") != "ok":
|
||||||
|
_log("Command not understood")
|
||||||
|
return {"text": "No entendi el comando", "status": "error"}
|
||||||
|
|
||||||
|
action = llm_result.get("accion", "error")
|
||||||
|
obj = llm_result.get("objeto", "no especificado")
|
||||||
|
color = llm_result.get("color", "no especificado")
|
||||||
|
size = llm_result.get("tamano", "no especificado")
|
||||||
|
|
||||||
|
_log(f"Intent: action={action}, object={obj}, color={color}, size={size}")
|
||||||
|
|
||||||
|
pending_intents.append(
|
||||||
|
{"action": action, "obj": obj, "color": color, "size": size}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to history
|
||||||
|
shared_state.add_to_history({
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"input": transcript,
|
||||||
|
"action": action,
|
||||||
|
"object": obj,
|
||||||
|
"color": color,
|
||||||
|
"size": size,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {"text": f"Ok, voy a {action}", "status": "ok"}
|
||||||
|
|
||||||
|
# Set command callback for web interface
|
||||||
|
shared_state.set_command_callback(command_handler)
|
||||||
|
|
||||||
|
# Start web API server if enabled
|
||||||
|
if api_cfg.enabled:
|
||||||
|
start_api_server(shared_state, api_cfg)
|
||||||
|
|
||||||
|
# Create Dora node
|
||||||
|
node = Node()
|
||||||
|
_log("Dora node created, waiting for events...")
|
||||||
|
|
||||||
|
first_event = True
|
||||||
|
for event in node:
|
||||||
|
# Send scene reset on first event (startup)
|
||||||
|
if first_event and send_init_scene_reset:
|
||||||
|
first_event = False
|
||||||
|
scene_payload = json.dumps(
|
||||||
|
{"objects": list(state.static_objects), "reset": True}
|
||||||
|
)
|
||||||
|
node.send_output(
|
||||||
|
scene_output,
|
||||||
|
pa.array([scene_payload]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
_log("Sent initial scene reset notification")
|
||||||
|
if event["type"] != "INPUT":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle voice input
|
||||||
|
if event["id"] == voice_in_input:
|
||||||
|
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
response = command_handler(raw)
|
||||||
|
node.send_output(
|
||||||
|
voice_out_output,
|
||||||
|
pa.array([json.dumps(response)]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle pose updates
|
||||||
|
if event["id"] == pose_input:
|
||||||
|
tcp_pose = event["value"].to_numpy().astype(np.float64).reshape(-1)
|
||||||
|
if tcp_pose.size >= 6:
|
||||||
|
state.latest_pose = tcp_pose[:6].tolist()
|
||||||
|
state.latest_pose_at = time.monotonic()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle object detection updates
|
||||||
|
if event["id"] == objects_input:
|
||||||
|
raw = event["value"][0].as_py() if len(event["value"]) else ""
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw)
|
||||||
|
objects = payload.get("objects", [])
|
||||||
|
except Exception:
|
||||||
|
objects = []
|
||||||
|
state.latest_objects = objects
|
||||||
|
state.latest_objects_at = time.monotonic()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle camera image
|
||||||
|
if event["id"] == image_input:
|
||||||
|
try:
|
||||||
|
# Get raw image data
|
||||||
|
img_data = event["value"].to_numpy()
|
||||||
|
# Reshape to image (assuming BGR format)
|
||||||
|
img = img_data.reshape((image_height, image_width, 3)).astype(np.uint8)
|
||||||
|
# Encode to JPEG
|
||||||
|
_, jpeg_data = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 80])
|
||||||
|
shared_state.update_image(jpeg_data.tobytes(), time.monotonic())
|
||||||
|
except Exception as e:
|
||||||
|
# Log error but don't crash
|
||||||
|
pass
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle robot status updates
|
||||||
|
if event["id"] == status_input:
|
||||||
|
payload = _parse_status_payload(event["value"])
|
||||||
|
if payload and state.pending_command:
|
||||||
|
if payload.get("command_id") == state.pending_command.get("id"):
|
||||||
|
_log(f"Command completed: {state.pending_command.get('action')} (status={payload.get('status', 'ok')})")
|
||||||
|
state.pending_command = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process pending intents
|
||||||
|
if pending_intents:
|
||||||
|
intent = pending_intents.popleft()
|
||||||
|
action = intent["action"]
|
||||||
|
obj = intent["obj"]
|
||||||
|
color = intent["color"]
|
||||||
|
size = intent["size"]
|
||||||
|
|
||||||
|
_log(f"Processing intent: {action} {obj} {color} {size}")
|
||||||
|
|
||||||
|
latest_pose = state.latest_pose
|
||||||
|
objects = list(state.latest_objects) + list(state.static_objects)
|
||||||
|
_log(f"Available objects: {len(state.latest_objects)} detected + {len(state.static_objects)} static")
|
||||||
|
|
||||||
|
if action in ("subir", "bajar") and latest_pose:
|
||||||
|
delta = cfg.step_mm if action == "subir" else -cfg.step_mm
|
||||||
|
target = np.array(latest_pose[:3], dtype=np.float64)
|
||||||
|
target[2] += delta
|
||||||
|
if _within_bounds(target, cfg.workspace_min, cfg.workspace_max):
|
||||||
|
step = RobotStep(
|
||||||
|
action="move_to_pose",
|
||||||
|
payload={
|
||||||
|
"x": float(target[0]),
|
||||||
|
"y": float(target[1]),
|
||||||
|
"z": float(target[2]),
|
||||||
|
"roll": cfg.default_roll,
|
||||||
|
"pitch": cfg.default_pitch,
|
||||||
|
"yaw": cfg.default_yaw,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
state.queue.append(step)
|
||||||
|
_log(f"Queued: move Z to {target[2]:.1f}mm (delta={delta:+.1f})")
|
||||||
|
else:
|
||||||
|
_log(f"Target {target.tolist()} out of bounds, skipping")
|
||||||
|
|
||||||
|
elif action in ("ir", "tomar", "soltar"):
|
||||||
|
target_obj = None
|
||||||
|
if obj != "no especificado":
|
||||||
|
target_name = _translate_target(obj, cfg.class_map)
|
||||||
|
target_color = _translate_target(color, cfg.class_map)
|
||||||
|
_log(f"Looking for: type={target_name}, color={target_color}")
|
||||||
|
# Log available objects for debugging
|
||||||
|
for o in objects:
|
||||||
|
_log(f" -> Available: {o.get('object_type')} {o.get('color')} {o.get('size')} at {o.get('position_mm')}")
|
||||||
|
for o in objects:
|
||||||
|
if o.get("object_type") == target_name:
|
||||||
|
if color == "no especificado" or o.get("color") == target_color:
|
||||||
|
if size == "no especificado" or o.get("size") == _translate_target(size, cfg.class_map):
|
||||||
|
target_obj = o
|
||||||
|
break
|
||||||
|
if target_obj:
|
||||||
|
_log(f"Found target: {target_obj.get('object_type')} {target_obj.get('color')} at {target_obj.get('position_mm')}")
|
||||||
|
pos = np.array(target_obj["position_mm"], dtype=np.float64)
|
||||||
|
approach = pos.copy()
|
||||||
|
approach[2] += cfg.tcp_offset_mm + cfg.approach_offset_mm
|
||||||
|
target = pos.copy()
|
||||||
|
target[2] += cfg.tcp_offset_mm
|
||||||
|
if _within_bounds(approach, cfg.workspace_min, cfg.workspace_max):
|
||||||
|
state.queue.append(
|
||||||
|
RobotStep(
|
||||||
|
action="move_to_pose",
|
||||||
|
payload={
|
||||||
|
"x": float(approach[0]),
|
||||||
|
"y": float(approach[1]),
|
||||||
|
"z": float(approach[2]),
|
||||||
|
"roll": cfg.default_roll,
|
||||||
|
"pitch": cfg.default_pitch,
|
||||||
|
"yaw": cfg.default_yaw,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_log(f"Queued: approach pose at Z={approach[2]:.1f}mm")
|
||||||
|
if _within_bounds(target, cfg.workspace_min, cfg.workspace_max):
|
||||||
|
state.queue.append(
|
||||||
|
RobotStep(
|
||||||
|
action="move_to_pose",
|
||||||
|
payload={
|
||||||
|
"x": float(target[0]),
|
||||||
|
"y": float(target[1]),
|
||||||
|
"z": float(target[2]),
|
||||||
|
"roll": cfg.default_roll,
|
||||||
|
"pitch": cfg.default_pitch,
|
||||||
|
"yaw": cfg.default_yaw,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_log(f"Queued: target pose at Z={target[2]:.1f}mm")
|
||||||
|
if action == "tomar":
|
||||||
|
state.queue.append(RobotStep(action="vacuum_on", payload={}))
|
||||||
|
_log("Queued: vacuum_on")
|
||||||
|
elif action == "soltar":
|
||||||
|
state.queue.append(RobotStep(action="vacuum_off", payload={}))
|
||||||
|
_log("Queued: vacuum_off")
|
||||||
|
else:
|
||||||
|
_log(f"Target object not found: {obj} {color}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif action == "reiniciar":
|
||||||
|
_log(f"Reiniciar: resetting scene and moving to home [{init_x}, {init_y}, {init_z}]")
|
||||||
|
# Turn off vacuum first
|
||||||
|
state.queue.append(RobotStep(action="vacuum_off", payload={}))
|
||||||
|
# Clear current detected objects (will be refreshed by detector)
|
||||||
|
state.latest_objects = []
|
||||||
|
state.latest_objects_at = None
|
||||||
|
_log("Cleared detected objects - waiting for fresh detection")
|
||||||
|
# Move to initial position
|
||||||
|
state.queue.append(
|
||||||
|
RobotStep(
|
||||||
|
action="move_to_pose",
|
||||||
|
payload={
|
||||||
|
"x": init_x,
|
||||||
|
"y": init_y,
|
||||||
|
"z": init_z,
|
||||||
|
"roll": init_roll,
|
||||||
|
"pitch": init_pitch,
|
||||||
|
"yaw": init_yaw,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_log(f"Queued: vacuum_off + move to home")
|
||||||
|
# Send scene update to notify clients that scene was reset
|
||||||
|
scene_payload = json.dumps(
|
||||||
|
{"objects": list(state.static_objects), "reset": True}
|
||||||
|
)
|
||||||
|
node.send_output(
|
||||||
|
scene_output,
|
||||||
|
pa.array([scene_payload]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
_log("Sent scene reset notification")
|
||||||
|
|
||||||
|
_log(f"Queue size: {len(state.queue)}")
|
||||||
|
|
||||||
|
# Emit scene updates when objects change
|
||||||
|
if event["id"] == objects_input:
|
||||||
|
scene_payload = json.dumps(
|
||||||
|
{"objects": list(state.latest_objects) + list(state.static_objects)}
|
||||||
|
)
|
||||||
|
node.send_output(
|
||||||
|
scene_output,
|
||||||
|
pa.array([scene_payload]),
|
||||||
|
metadata={"encoding": "json", "timestamp_ns": time.time_ns()},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send queued robot steps one at a time
|
||||||
|
if state.pending_command is None and state.queue:
|
||||||
|
step = state.queue.popleft()
|
||||||
|
if cfg.dry_run:
|
||||||
|
_log(f"[DRY RUN] Would send: {step.action} {step.payload}")
|
||||||
|
state.pending_command = None
|
||||||
|
continue
|
||||||
|
cmd_id = _send_dora_command(node, command_output, step.action, step.payload)
|
||||||
|
state.pending_command = {"id": cmd_id, "action": step.action}
|
||||||
|
_log(f"Sent command: {step.action} (id={cmd_id[:8]}...) remaining={len(state.queue)}")
|
||||||
|
|
||||||
|
# Update debug state
|
||||||
|
shared_state.update_robot_command(
|
||||||
|
{"id": cmd_id, "action": step.action, "payload": step.payload},
|
||||||
|
time.monotonic(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
38
dora_voice_control/dora_voice_control/models.py
Normal file
38
dora_voice_control/dora_voice_control/models.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""Pydantic models for the voice control API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class CommandRequest(BaseModel):
|
||||||
|
"""Request to send a voice command."""
|
||||||
|
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class CommandResponse(BaseModel):
|
||||||
|
"""Response from a voice command."""
|
||||||
|
|
||||||
|
ok: bool
|
||||||
|
text: str
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
class MoveRequest(BaseModel):
|
||||||
|
"""Request to move to a position."""
|
||||||
|
|
||||||
|
x: float
|
||||||
|
y: float
|
||||||
|
z: float
|
||||||
|
roll: Optional[float] = 180.0
|
||||||
|
pitch: Optional[float] = 0.0
|
||||||
|
yaw: Optional[float] = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class VacuumRequest(BaseModel):
|
||||||
|
"""Request to control the vacuum."""
|
||||||
|
|
||||||
|
on: bool
|
||||||
118
dora_voice_control/dora_voice_control/parser.py
Normal file
118
dora_voice_control/dora_voice_control/parser.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""Voice command parsing logic."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import unicodedata
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(text: str) -> str:
|
||||||
|
"""Normalize text: lowercase, remove accents."""
|
||||||
|
text = text.lower().strip()
|
||||||
|
text = unicodedata.normalize("NFKD", text)
|
||||||
|
text = "".join([c for c in text if not unicodedata.combining(c)])
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def rule_parse(transcript: str) -> Dict[str, str]:
|
||||||
|
"""Parse voice command using rule-based approach."""
|
||||||
|
text = normalize(transcript)
|
||||||
|
|
||||||
|
action = "error"
|
||||||
|
if any(w in text for w in ["reiniciar", "reinicia", "reset"]):
|
||||||
|
action = "reiniciar"
|
||||||
|
elif any(w in text for w in ["sube", "subir", "arriba"]):
|
||||||
|
action = "subir"
|
||||||
|
elif any(w in text for w in ["baja", "bajar", "abajo"]):
|
||||||
|
action = "bajar"
|
||||||
|
elif any(w in text for w in ["soltar", "deja", "dejar"]):
|
||||||
|
action = "soltar"
|
||||||
|
elif any(w in text for w in ["tomar", "toma", "agarra", "agarrar", "coger", "chupar", "succionar"]):
|
||||||
|
action = "tomar"
|
||||||
|
elif any(w in text for w in ["ir", "ve", "mover", "muevete", "acercar"]):
|
||||||
|
action = "ir"
|
||||||
|
|
||||||
|
color = "no especificado"
|
||||||
|
if "rojo" in text:
|
||||||
|
color = "rojo"
|
||||||
|
elif "azul" in text:
|
||||||
|
color = "azul"
|
||||||
|
elif "amarillo" in text:
|
||||||
|
color = "amarillo"
|
||||||
|
elif "blanco" in text:
|
||||||
|
color = "blanco"
|
||||||
|
|
||||||
|
obj = "no especificado"
|
||||||
|
if "estrella" in text:
|
||||||
|
obj = "estrella"
|
||||||
|
elif "cilindro" in text:
|
||||||
|
obj = "cilindro"
|
||||||
|
elif "cubo" in text:
|
||||||
|
obj = "cubo"
|
||||||
|
elif "caja" in text:
|
||||||
|
obj = "caja"
|
||||||
|
|
||||||
|
size = "no especificado"
|
||||||
|
if "grande" in text:
|
||||||
|
size = "grande"
|
||||||
|
elif "pequeno" in text or "pequeño" in text or "chico" in text:
|
||||||
|
size = "pequeno"
|
||||||
|
|
||||||
|
if action == "error":
|
||||||
|
return {"resultado": "error"}
|
||||||
|
return {
|
||||||
|
"resultado": "ok",
|
||||||
|
"accion": action,
|
||||||
|
"objeto": obj,
|
||||||
|
"color": color,
|
||||||
|
"tamano": size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_gemini_prompt(transcript: str) -> str:
|
||||||
|
"""Build prompt for Gemini LLM parsing."""
|
||||||
|
return f"""Interpreta el siguiente comando de voz de un niño, convertido a texto, para controlar
|
||||||
|
un robot (manito). Asegúrate de responder con 'accion', 'objeto', 'color' y 'tamano'. Si el color
|
||||||
|
o el tamaño no están especificados, responde con 'no especificado'. Si no entiendes la frase,
|
||||||
|
responde con 'resultado: error'. En caso contrario, responde con 'resultado: ok'. Las acciones
|
||||||
|
posibles son 'bajar', 'subir', 'soltar', 'tomar', 'ir', 'reiniciar'. Los colores posibles son 'rojo',
|
||||||
|
'blanco','azul' y 'amarillo'. Los tamaños posibles son 'grande', 'pequeno'. Los posible objetos son estrella,
|
||||||
|
cilindro, cubo y caja; cualquier otro objeto es error.
|
||||||
|
Comando: "{transcript}"
|
||||||
|
Nota: Los comandos pueden incluir variaciones en la expresión y errores comunes en el lenguaje de
|
||||||
|
los niños. Normaliza la respuesta a las categorías establecidas. La salida es un json con los campos
|
||||||
|
'resultado', 'accion', 'objeto', 'color' y 'tamano'. Adicionalmente los ninos pueden decir tomar,chupar, succionar o similar para tomar un objeto.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_command(transcript: str, llm_provider: str = "rules") -> Dict[str, str]:
|
||||||
|
"""Parse voice command using specified provider."""
|
||||||
|
if llm_provider == "gemini":
|
||||||
|
try:
|
||||||
|
from google import genai
|
||||||
|
from google.genai import types
|
||||||
|
except Exception:
|
||||||
|
return rule_parse(transcript)
|
||||||
|
|
||||||
|
api_key = os.getenv("GOOGLE_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
return rule_parse(transcript)
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = genai.Client(api_key=api_key)
|
||||||
|
prompt = build_gemini_prompt(transcript)
|
||||||
|
reply = client.models.generate_content(
|
||||||
|
model=os.getenv("GEMINI_MODEL", "gemini-2.0-flash"),
|
||||||
|
contents=prompt,
|
||||||
|
config=types.GenerateContentConfig(temperature=0.5),
|
||||||
|
)
|
||||||
|
raw = str(reply.text).replace("```json", "").replace("```", "")
|
||||||
|
return json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"resultado": "error"}
|
||||||
|
except Exception:
|
||||||
|
return rule_parse(transcript)
|
||||||
|
else:
|
||||||
|
return rule_parse(transcript)
|
||||||
158
dora_voice_control/dora_voice_control/state.py
Normal file
158
dora_voice_control/dora_voice_control/state.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
"""Shared state management for voice control node."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
from collections import deque
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Deque, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RobotStep:
|
||||||
|
"""A single step in the robot command queue."""
|
||||||
|
|
||||||
|
action: str
|
||||||
|
payload: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VoiceState:
|
||||||
|
"""Runtime state for voice control."""
|
||||||
|
|
||||||
|
latest_pose: Optional[List[float]] = None
|
||||||
|
latest_pose_at: Optional[float] = None
|
||||||
|
latest_objects: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
latest_objects_at: Optional[float] = None
|
||||||
|
static_objects: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
pending_command: Optional[Dict[str, Any]] = None
|
||||||
|
queue: Deque[RobotStep] = field(default_factory=deque)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DebugState:
|
||||||
|
"""Debug information for the web interface."""
|
||||||
|
|
||||||
|
last_voice_input: Optional[str] = None
|
||||||
|
last_voice_input_at: Optional[float] = None
|
||||||
|
last_parse_result: Optional[Dict[str, Any]] = None
|
||||||
|
last_robot_command: Optional[Dict[str, Any]] = None
|
||||||
|
last_robot_command_at: Optional[float] = None
|
||||||
|
command_history: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
error_log: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
latest_image: Optional[bytes] = None
|
||||||
|
latest_image_at: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
|
class SharedState:
|
||||||
|
"""Thread-safe shared state container."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self.voice_state = VoiceState()
|
||||||
|
self.debug_state = DebugState()
|
||||||
|
self._command_callback: Optional[Any] = None
|
||||||
|
|
||||||
|
def set_command_callback(self, callback: Any) -> None:
|
||||||
|
"""Set callback for sending commands from web interface."""
|
||||||
|
with self._lock:
|
||||||
|
self._command_callback = callback
|
||||||
|
|
||||||
|
def get_command_callback(self) -> Optional[Any]:
|
||||||
|
"""Get the command callback."""
|
||||||
|
with self._lock:
|
||||||
|
return self._command_callback
|
||||||
|
|
||||||
|
def get_status(self) -> Dict[str, Any]:
|
||||||
|
"""Get current status for web interface."""
|
||||||
|
with self._lock:
|
||||||
|
vs = self.voice_state
|
||||||
|
ds = self.debug_state
|
||||||
|
return {
|
||||||
|
"has_pose": vs.latest_pose is not None,
|
||||||
|
"pose": vs.latest_pose,
|
||||||
|
"pose_age_ms": _age_ms(vs.latest_pose_at),
|
||||||
|
"object_count": len(vs.latest_objects),
|
||||||
|
"static_object_count": len(vs.static_objects),
|
||||||
|
"queue_size": len(vs.queue),
|
||||||
|
"has_pending_command": vs.pending_command is not None,
|
||||||
|
"pending_command": vs.pending_command,
|
||||||
|
"last_voice_input": ds.last_voice_input,
|
||||||
|
"last_voice_input_age_ms": _age_ms(ds.last_voice_input_at),
|
||||||
|
"last_parse_result": ds.last_parse_result,
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_objects(self) -> Dict[str, Any]:
|
||||||
|
"""Get detected and static objects."""
|
||||||
|
with self._lock:
|
||||||
|
return {
|
||||||
|
"detected": list(self.voice_state.latest_objects),
|
||||||
|
"static": list(self.voice_state.static_objects),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_queue(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get the command queue."""
|
||||||
|
with self._lock:
|
||||||
|
return [{"action": s.action, "payload": s.payload} for s in self.voice_state.queue]
|
||||||
|
|
||||||
|
def get_history(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get command history."""
|
||||||
|
with self._lock:
|
||||||
|
return list(self.debug_state.command_history[-50:])
|
||||||
|
|
||||||
|
def get_errors(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get error log."""
|
||||||
|
with self._lock:
|
||||||
|
return list(self.debug_state.error_log[-50:])
|
||||||
|
|
||||||
|
def add_to_history(self, entry: Dict[str, Any]) -> None:
|
||||||
|
"""Add entry to command history."""
|
||||||
|
with self._lock:
|
||||||
|
self.debug_state.command_history.append(entry)
|
||||||
|
if len(self.debug_state.command_history) > 100:
|
||||||
|
self.debug_state.command_history = self.debug_state.command_history[-100:]
|
||||||
|
|
||||||
|
def add_error(self, error: Dict[str, Any]) -> None:
|
||||||
|
"""Add entry to error log."""
|
||||||
|
with self._lock:
|
||||||
|
self.debug_state.error_log.append(error)
|
||||||
|
if len(self.debug_state.error_log) > 100:
|
||||||
|
self.debug_state.error_log = self.debug_state.error_log[-100:]
|
||||||
|
|
||||||
|
def update_voice_input(self, text: str, parse_result: Dict[str, Any], timestamp: float) -> None:
|
||||||
|
"""Update last voice input info."""
|
||||||
|
with self._lock:
|
||||||
|
self.debug_state.last_voice_input = text
|
||||||
|
self.debug_state.last_voice_input_at = timestamp
|
||||||
|
self.debug_state.last_parse_result = parse_result
|
||||||
|
|
||||||
|
def update_robot_command(self, command: Dict[str, Any], timestamp: float) -> None:
|
||||||
|
"""Update last robot command info."""
|
||||||
|
with self._lock:
|
||||||
|
self.debug_state.last_robot_command = command
|
||||||
|
self.debug_state.last_robot_command_at = timestamp
|
||||||
|
|
||||||
|
def update_image(self, image_bytes: bytes, timestamp: float) -> None:
|
||||||
|
"""Update latest camera image."""
|
||||||
|
with self._lock:
|
||||||
|
self.debug_state.latest_image = image_bytes
|
||||||
|
self.debug_state.latest_image_at = timestamp
|
||||||
|
|
||||||
|
def get_image(self) -> Optional[bytes]:
|
||||||
|
"""Get latest camera image."""
|
||||||
|
with self._lock:
|
||||||
|
return self.debug_state.latest_image
|
||||||
|
|
||||||
|
def get_image_age_ms(self) -> Optional[int]:
|
||||||
|
"""Get age of latest image in milliseconds."""
|
||||||
|
with self._lock:
|
||||||
|
return _age_ms(self.debug_state.latest_image_at)
|
||||||
|
|
||||||
|
|
||||||
|
def _age_ms(timestamp: Optional[float]) -> Optional[int]:
|
||||||
|
"""Calculate age in milliseconds from monotonic timestamp."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
if timestamp is None:
|
||||||
|
return None
|
||||||
|
return int((time.monotonic() - timestamp) * 1000)
|
||||||
700
dora_voice_control/dora_voice_control/templates.py
Normal file
700
dora_voice_control/dora_voice_control/templates.py
Normal file
@@ -0,0 +1,700 @@
|
|||||||
|
"""HTML templates for the voice control web interface."""
|
||||||
|
|
||||||
|
HTML_TEMPLATE = """<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Voice Control Debug</title>
|
||||||
|
<style>
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body {
|
||||||
|
font-family: 'Segoe UI', system-ui, sans-serif;
|
||||||
|
background: #1a1a2e;
|
||||||
|
color: #eee;
|
||||||
|
min-height: 100vh;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
.header {
|
||||||
|
text-align: center;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
padding-bottom: 15px;
|
||||||
|
border-bottom: 1px solid #333;
|
||||||
|
}
|
||||||
|
.header h1 { color: #00d4ff; font-size: 1.5em; }
|
||||||
|
.header .status {
|
||||||
|
margin-top: 8px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.status-dot {
|
||||||
|
display: inline-block;
|
||||||
|
width: 10px;
|
||||||
|
height: 10px;
|
||||||
|
border-radius: 50%;
|
||||||
|
margin-right: 6px;
|
||||||
|
}
|
||||||
|
.status-dot.ok { background: #00ff88; }
|
||||||
|
.status-dot.warn { background: #ffaa00; }
|
||||||
|
.status-dot.error { background: #ff4444; }
|
||||||
|
|
||||||
|
.grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
|
||||||
|
gap: 15px;
|
||||||
|
max-width: 1400px;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
.card {
|
||||||
|
background: #16213e;
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 15px;
|
||||||
|
border: 1px solid #0f3460;
|
||||||
|
}
|
||||||
|
.card h2 {
|
||||||
|
color: #00d4ff;
|
||||||
|
font-size: 1em;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
padding-bottom: 8px;
|
||||||
|
border-bottom: 1px solid #0f3460;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Command Input */
|
||||||
|
.command-form {
|
||||||
|
display: flex;
|
||||||
|
gap: 10px;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
.command-form input {
|
||||||
|
flex: 1;
|
||||||
|
padding: 10px 12px;
|
||||||
|
border: 1px solid #0f3460;
|
||||||
|
border-radius: 6px;
|
||||||
|
background: #1a1a2e;
|
||||||
|
color: #fff;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
.command-form input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: #00d4ff;
|
||||||
|
}
|
||||||
|
.btn {
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: none;
|
||||||
|
border-radius: 6px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-weight: 500;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
.btn-primary {
|
||||||
|
background: #00d4ff;
|
||||||
|
color: #000;
|
||||||
|
}
|
||||||
|
.btn-primary:hover { background: #00b8e0; }
|
||||||
|
.btn-primary:disabled { background: #555; color: #888; cursor: not-allowed; }
|
||||||
|
.btn-secondary {
|
||||||
|
background: #333;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
.btn-secondary:hover { background: #444; }
|
||||||
|
.btn-danger {
|
||||||
|
background: #ff4444;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
.btn-danger:hover { background: #cc3333; }
|
||||||
|
.btn-success {
|
||||||
|
background: #00ff88;
|
||||||
|
color: #000;
|
||||||
|
}
|
||||||
|
.btn-success:hover { background: #00cc6a; }
|
||||||
|
|
||||||
|
/* Quick Commands */
|
||||||
|
.quick-commands {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
.quick-btn {
|
||||||
|
padding: 8px 14px;
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #1a4a7a;
|
||||||
|
border-radius: 20px;
|
||||||
|
color: #00d4ff;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 13px;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
.quick-btn:hover {
|
||||||
|
background: #1a4a7a;
|
||||||
|
border-color: #00d4ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status Grid */
|
||||||
|
.status-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
.status-item {
|
||||||
|
background: #1a1a2e;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
.status-item .label {
|
||||||
|
font-size: 11px;
|
||||||
|
color: #888;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.status-item .value {
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
.status-item .value.ok { color: #00ff88; }
|
||||||
|
.status-item .value.warn { color: #ffaa00; }
|
||||||
|
.status-item .value.error { color: #ff4444; }
|
||||||
|
|
||||||
|
/* Pose Display */
|
||||||
|
.pose-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
.pose-item {
|
||||||
|
background: #1a1a2e;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.pose-item .label {
|
||||||
|
font-size: 11px;
|
||||||
|
color: #888;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.pose-item .value {
|
||||||
|
font-size: 16px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #00d4ff;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Objects List */
|
||||||
|
.objects-list {
|
||||||
|
max-height: 300px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
.object-item {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 8px 10px;
|
||||||
|
background: #1a1a2e;
|
||||||
|
border-radius: 6px;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
.object-item .type { color: #00d4ff; font-weight: 500; }
|
||||||
|
.object-item .color-badge {
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 10px;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
.color-badge.red { background: #ff4444; color: #fff; }
|
||||||
|
.color-badge.blue { background: #4488ff; color: #fff; }
|
||||||
|
.color-badge.yellow { background: #ffcc00; color: #000; }
|
||||||
|
.color-badge.white { background: #fff; color: #000; }
|
||||||
|
.object-item .pos {
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 11px;
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Queue Display */
|
||||||
|
.queue-list {
|
||||||
|
max-height: 150px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
.queue-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 10px;
|
||||||
|
padding: 8px 10px;
|
||||||
|
background: #1a1a2e;
|
||||||
|
border-radius: 6px;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
.queue-item .index {
|
||||||
|
background: #0f3460;
|
||||||
|
color: #00d4ff;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
.queue-item .action { color: #00ff88; font-weight: 500; }
|
||||||
|
.queue-item.pending { border-left: 3px solid #ffaa00; }
|
||||||
|
|
||||||
|
/* Log Display */
|
||||||
|
.log {
|
||||||
|
max-height: 250px;
|
||||||
|
overflow-y: auto;
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
.log-entry {
|
||||||
|
padding: 6px 10px;
|
||||||
|
border-bottom: 1px solid #0f3460;
|
||||||
|
}
|
||||||
|
.log-entry:last-child { border-bottom: none; }
|
||||||
|
.log-entry .time {
|
||||||
|
color: #666;
|
||||||
|
margin-right: 10px;
|
||||||
|
}
|
||||||
|
.log-entry.error { color: #ff4444; }
|
||||||
|
.log-entry.success { color: #00ff88; }
|
||||||
|
.log-entry.info { color: #00d4ff; }
|
||||||
|
|
||||||
|
/* Parse Result */
|
||||||
|
.parse-result {
|
||||||
|
background: #1a1a2e;
|
||||||
|
padding: 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
.parse-result .field {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 4px 0;
|
||||||
|
border-bottom: 1px solid #0f3460;
|
||||||
|
}
|
||||||
|
.parse-result .field:last-child { border-bottom: none; }
|
||||||
|
.parse-result .key { color: #888; }
|
||||||
|
.parse-result .val { color: #00d4ff; }
|
||||||
|
|
||||||
|
/* Empty State */
|
||||||
|
.empty {
|
||||||
|
text-align: center;
|
||||||
|
color: #666;
|
||||||
|
padding: 20px;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Camera View */
|
||||||
|
.camera-container {
|
||||||
|
position: relative;
|
||||||
|
background: #000;
|
||||||
|
border-radius: 6px;
|
||||||
|
overflow: hidden;
|
||||||
|
min-height: 240px;
|
||||||
|
}
|
||||||
|
.camera-container img {
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
.camera-overlay {
|
||||||
|
position: absolute;
|
||||||
|
top: 10px;
|
||||||
|
right: 10px;
|
||||||
|
background: rgba(0,0,0,0.6);
|
||||||
|
padding: 4px 8px;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
.camera-overlay.ok { color: #00ff88; }
|
||||||
|
.camera-overlay.stale { color: #ffaa00; }
|
||||||
|
.camera-overlay.error { color: #ff4444; }
|
||||||
|
.no-image {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 240px;
|
||||||
|
color: #666;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="header">
|
||||||
|
<h1>Voice Control Debug Interface</h1>
|
||||||
|
<div class="status">
|
||||||
|
<span class="status-dot" id="status-dot"></span>
|
||||||
|
<span id="status-text">Connecting...</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid">
|
||||||
|
<!-- Camera View -->
|
||||||
|
<div class="card" style="grid-column: span 2;">
|
||||||
|
<h2>Camera View <span id="camera-status" style="font-weight:normal;font-size:11px;color:#888;"></span></h2>
|
||||||
|
<div class="camera-container" id="camera-container">
|
||||||
|
<div class="no-image" id="no-image">No camera image available</div>
|
||||||
|
<img id="camera-img" style="display:none;" alt="Camera feed">
|
||||||
|
<div class="camera-overlay" id="camera-overlay"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Command Input -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>Send Command</h2>
|
||||||
|
<form class="command-form" id="command-form">
|
||||||
|
<input type="text" id="command-input" placeholder="Enter voice command (e.g., 'sube', 'agarra el cubo rojo')" autocomplete="off">
|
||||||
|
<button type="submit" class="btn btn-primary" id="btn-send">Send</button>
|
||||||
|
</form>
|
||||||
|
<div class="quick-commands">
|
||||||
|
<button class="quick-btn" onclick="sendQuick('sube')">Sube</button>
|
||||||
|
<button class="quick-btn" onclick="sendQuick('baja')">Baja</button>
|
||||||
|
<button class="quick-btn" onclick="sendQuick('agarra el cubo rojo')">Cubo Rojo</button>
|
||||||
|
<button class="quick-btn" onclick="sendQuick('agarra el cubo azul')">Cubo Azul</button>
|
||||||
|
<button class="quick-btn" onclick="sendQuick('suelta')">Soltar</button>
|
||||||
|
<button class="quick-btn" onclick="sendQuick('reinicia')">Reiniciar</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Status -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>System Status</h2>
|
||||||
|
<div class="status-grid">
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Pose Available</div>
|
||||||
|
<div class="value" id="st-pose">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Pose Age</div>
|
||||||
|
<div class="value" id="st-pose-age">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Objects Detected</div>
|
||||||
|
<div class="value" id="st-objects">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Static Objects</div>
|
||||||
|
<div class="value" id="st-static">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Queue Size</div>
|
||||||
|
<div class="value" id="st-queue">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="status-item">
|
||||||
|
<div class="label">Pending Command</div>
|
||||||
|
<div class="value" id="st-pending">--</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- TCP Pose -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>TCP Pose</h2>
|
||||||
|
<div class="pose-grid">
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">X (mm)</div>
|
||||||
|
<div class="value" id="pose-x">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">Y (mm)</div>
|
||||||
|
<div class="value" id="pose-y">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">Z (mm)</div>
|
||||||
|
<div class="value" id="pose-z">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">Roll</div>
|
||||||
|
<div class="value" id="pose-roll">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">Pitch</div>
|
||||||
|
<div class="value" id="pose-pitch">--</div>
|
||||||
|
</div>
|
||||||
|
<div class="pose-item">
|
||||||
|
<div class="label">Yaw</div>
|
||||||
|
<div class="value" id="pose-yaw">--</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Last Parse Result -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>Last Parse Result</h2>
|
||||||
|
<div class="parse-result" id="parse-result">
|
||||||
|
<div class="empty">No command parsed yet</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top: 10px; font-size: 12px; color: #888;">
|
||||||
|
<span>Last input: </span><span id="last-input">--</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Detected Objects -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>Detected Objects</h2>
|
||||||
|
<div class="objects-list" id="objects-list">
|
||||||
|
<div class="empty">No objects detected</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Command Queue -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>Command Queue</h2>
|
||||||
|
<div class="queue-list" id="queue-list">
|
||||||
|
<div class="empty">Queue is empty</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top: 10px;">
|
||||||
|
<button class="btn btn-danger btn-sm" onclick="clearQueue()">Clear Queue</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Log -->
|
||||||
|
<div class="card" style="grid-column: span 2;">
|
||||||
|
<h2>Activity Log</h2>
|
||||||
|
<div class="log" id="log"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const $ = id => document.getElementById(id);
|
||||||
|
|
||||||
|
async function fetchJson(url, opts = {}) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(url, opts);
|
||||||
|
return await res.json();
|
||||||
|
} catch (e) {
|
||||||
|
return { error: e.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function log(msg, type = 'info') {
|
||||||
|
const logEl = $('log');
|
||||||
|
const time = new Date().toLocaleTimeString();
|
||||||
|
const entry = document.createElement('div');
|
||||||
|
entry.className = 'log-entry ' + type;
|
||||||
|
entry.innerHTML = '<span class="time">' + time + '</span>' + msg;
|
||||||
|
logEl.insertBefore(entry, logEl.firstChild);
|
||||||
|
if (logEl.children.length > 100) logEl.removeChild(logEl.lastChild);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function updateStatus() {
|
||||||
|
const data = await fetchJson('/api/status');
|
||||||
|
if (data.error) {
|
||||||
|
$('status-dot').className = 'status-dot error';
|
||||||
|
$('status-text').textContent = 'Error: ' + data.error;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$('status-dot').className = 'status-dot ok';
|
||||||
|
$('status-text').textContent = 'Connected';
|
||||||
|
|
||||||
|
$('st-pose').textContent = data.has_pose ? 'Yes' : 'No';
|
||||||
|
$('st-pose').className = 'value ' + (data.has_pose ? 'ok' : 'warn');
|
||||||
|
|
||||||
|
$('st-pose-age').textContent = data.pose_age_ms !== null ? data.pose_age_ms + 'ms' : '--';
|
||||||
|
$('st-pose-age').className = 'value ' + (data.pose_age_ms < 1000 ? 'ok' : 'warn');
|
||||||
|
|
||||||
|
$('st-objects').textContent = data.object_count;
|
||||||
|
$('st-static').textContent = data.static_object_count;
|
||||||
|
$('st-queue').textContent = data.queue_size;
|
||||||
|
$('st-queue').className = 'value ' + (data.queue_size > 0 ? 'warn' : 'ok');
|
||||||
|
|
||||||
|
$('st-pending').textContent = data.has_pending_command ? 'Yes' : 'No';
|
||||||
|
$('st-pending').className = 'value ' + (data.has_pending_command ? 'warn' : 'ok');
|
||||||
|
|
||||||
|
// Update pose
|
||||||
|
if (data.pose) {
|
||||||
|
$('pose-x').textContent = data.pose[0].toFixed(1);
|
||||||
|
$('pose-y').textContent = data.pose[1].toFixed(1);
|
||||||
|
$('pose-z').textContent = data.pose[2].toFixed(1);
|
||||||
|
$('pose-roll').textContent = data.pose[3].toFixed(1);
|
||||||
|
$('pose-pitch').textContent = data.pose[4].toFixed(1);
|
||||||
|
$('pose-yaw').textContent = data.pose[5].toFixed(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update last input
|
||||||
|
$('last-input').textContent = data.last_voice_input || '--';
|
||||||
|
|
||||||
|
// Update parse result
|
||||||
|
if (data.last_parse_result) {
|
||||||
|
let html = '';
|
||||||
|
for (const [k, v] of Object.entries(data.last_parse_result)) {
|
||||||
|
html += '<div class="field"><span class="key">' + k + '</span><span class="val">' + v + '</span></div>';
|
||||||
|
}
|
||||||
|
$('parse-result').innerHTML = html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function updateObjects() {
|
||||||
|
const data = await fetchJson('/api/objects');
|
||||||
|
if (data.error) return;
|
||||||
|
|
||||||
|
const list = $('objects-list');
|
||||||
|
const detected = data.detected || [];
|
||||||
|
const staticObjs = data.static || [];
|
||||||
|
|
||||||
|
if (detected.length === 0 && staticObjs.length === 0) {
|
||||||
|
list.innerHTML = '<div class="empty">No objects detected</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let html = '';
|
||||||
|
|
||||||
|
// Detected objects
|
||||||
|
if (detected.length > 0) {
|
||||||
|
html += '<div style="font-size:11px;color:#00d4ff;margin-bottom:6px;">Detected (' + detected.length + ')</div>';
|
||||||
|
html += detected.map(obj => {
|
||||||
|
const pos = obj.position_mm ? obj.position_mm.map(v => v.toFixed(0)).join(', ') : '--';
|
||||||
|
const colorClass = obj.color || 'white';
|
||||||
|
const conf = obj.confidence ? (obj.confidence * 100).toFixed(0) + '%' : '';
|
||||||
|
const size = obj.size || '';
|
||||||
|
return '<div class="object-item">' +
|
||||||
|
'<span class="type">' + (obj.object_type || '?') + '</span>' +
|
||||||
|
'<span class="color-badge ' + colorClass + '">' + (obj.color || '?') + '</span>' +
|
||||||
|
'<span style="color:#888;font-size:10px;">' + size + '</span>' +
|
||||||
|
'<span style="color:#00ff88;font-size:10px;">' + conf + '</span>' +
|
||||||
|
'<span class="pos">[' + pos + ']</span>' +
|
||||||
|
'</div>';
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Static objects
|
||||||
|
if (staticObjs.length > 0) {
|
||||||
|
html += '<div style="font-size:11px;color:#888;margin:8px 0 6px 0;">Static (' + staticObjs.length + ')</div>';
|
||||||
|
html += staticObjs.map(obj => {
|
||||||
|
const pos = obj.position_mm ? obj.position_mm.map(v => v.toFixed(0)).join(', ') : '--';
|
||||||
|
const colorClass = obj.color || 'white';
|
||||||
|
return '<div class="object-item" style="opacity:0.7;">' +
|
||||||
|
'<span class="type">' + (obj.object_type || '?') + '</span>' +
|
||||||
|
'<span class="color-badge ' + colorClass + '">' + (obj.color || '?') + '</span>' +
|
||||||
|
'<span class="pos">[' + pos + ']</span>' +
|
||||||
|
'</div>';
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
list.innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function updateQueue() {
|
||||||
|
const data = await fetchJson('/api/queue');
|
||||||
|
if (data.error) return;
|
||||||
|
|
||||||
|
const list = $('queue-list');
|
||||||
|
if (!data.length) {
|
||||||
|
list.innerHTML = '<div class="empty">Queue is empty</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
list.innerHTML = data.map((item, i) => {
|
||||||
|
const payload = JSON.stringify(item.payload || {});
|
||||||
|
return '<div class="queue-item">' +
|
||||||
|
'<span class="index">' + (i + 1) + '</span>' +
|
||||||
|
'<span class="action">' + item.action + '</span>' +
|
||||||
|
'<span style="color:#888;font-size:11px">' + payload + '</span>' +
|
||||||
|
'</div>';
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function sendCommand(text) {
|
||||||
|
if (!text.trim()) return;
|
||||||
|
|
||||||
|
$('btn-send').disabled = true;
|
||||||
|
log('Sending: "' + text + '"', 'info');
|
||||||
|
|
||||||
|
const res = await fetchJson('/api/command', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ text: text })
|
||||||
|
});
|
||||||
|
|
||||||
|
$('btn-send').disabled = false;
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
log('Response: ' + res.text, 'success');
|
||||||
|
} else {
|
||||||
|
log('Error: ' + (res.text || res.detail || 'Unknown error'), 'error');
|
||||||
|
}
|
||||||
|
|
||||||
|
$('command-input').value = '';
|
||||||
|
updateStatus();
|
||||||
|
updateQueue();
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendQuick(text) {
|
||||||
|
$('command-input').value = text;
|
||||||
|
sendCommand(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function clearQueue() {
|
||||||
|
log('Clearing queue...', 'info');
|
||||||
|
const res = await fetchJson('/api/queue/clear', { method: 'POST' });
|
||||||
|
if (res.ok) {
|
||||||
|
log('Queue cleared', 'success');
|
||||||
|
} else {
|
||||||
|
log('Failed to clear queue', 'error');
|
||||||
|
}
|
||||||
|
updateQueue();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Form submit
|
||||||
|
$('command-form').addEventListener('submit', e => {
|
||||||
|
e.preventDefault();
|
||||||
|
sendCommand($('command-input').value);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Camera update
|
||||||
|
let cameraErrorCount = 0;
|
||||||
|
async function updateCamera() {
|
||||||
|
const info = await fetchJson('/api/image/info');
|
||||||
|
const overlay = $('camera-overlay');
|
||||||
|
const img = $('camera-img');
|
||||||
|
const noImage = $('no-image');
|
||||||
|
const status = $('camera-status');
|
||||||
|
|
||||||
|
if (info.error || !info.has_image) {
|
||||||
|
cameraErrorCount++;
|
||||||
|
if (cameraErrorCount > 3) {
|
||||||
|
img.style.display = 'none';
|
||||||
|
noImage.style.display = 'flex';
|
||||||
|
overlay.textContent = '';
|
||||||
|
status.textContent = '(no feed)';
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cameraErrorCount = 0;
|
||||||
|
noImage.style.display = 'none';
|
||||||
|
img.style.display = 'block';
|
||||||
|
|
||||||
|
// Update image with cache-busting
|
||||||
|
const newSrc = '/api/image?t=' + Date.now();
|
||||||
|
if (img.src !== newSrc) {
|
||||||
|
img.src = newSrc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update overlay
|
||||||
|
const ageMs = info.age_ms || 0;
|
||||||
|
if (ageMs < 500) {
|
||||||
|
overlay.textContent = 'LIVE';
|
||||||
|
overlay.className = 'camera-overlay ok';
|
||||||
|
} else if (ageMs < 2000) {
|
||||||
|
overlay.textContent = ageMs + 'ms';
|
||||||
|
overlay.className = 'camera-overlay stale';
|
||||||
|
} else {
|
||||||
|
overlay.textContent = 'STALE ' + (ageMs/1000).toFixed(1) + 's';
|
||||||
|
overlay.className = 'camera-overlay error';
|
||||||
|
}
|
||||||
|
status.textContent = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-refresh
|
||||||
|
setInterval(updateStatus, 500);
|
||||||
|
setInterval(updateObjects, 1000);
|
||||||
|
setInterval(updateQueue, 500);
|
||||||
|
setInterval(updateCamera, 100);
|
||||||
|
|
||||||
|
// Initial load
|
||||||
|
updateStatus();
|
||||||
|
updateObjects();
|
||||||
|
updateQueue();
|
||||||
|
updateCamera();
|
||||||
|
log('Interface loaded', 'info');
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
25
dora_voice_control/pyproject.toml
Normal file
25
dora_voice_control/pyproject.toml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[project]
|
||||||
|
name = "dora-voice-control"
|
||||||
|
version = "0.1.0"
|
||||||
|
license = { file = "MIT" }
|
||||||
|
authors = [{ name = "Dora" }]
|
||||||
|
description = "Dora node for voice command control via WebSocket"
|
||||||
|
|
||||||
|
requires-python = ">=3.8"
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"dora-rs >= 0.3.9",
|
||||||
|
"numpy < 2.0.0",
|
||||||
|
"pyarrow >= 12.0.0",
|
||||||
|
"websockets >= 12.0",
|
||||||
|
"fastapi >= 0.109.0",
|
||||||
|
"uvicorn >= 0.27.0",
|
||||||
|
"pydantic >= 2.0.0",
|
||||||
|
"opencv-python >= 4.8.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
llm = ["google-genai"]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
dora-voice-control = "dora_voice_control.main:main"
|
||||||
BIN
trained_models/yolo8n.pt
Normal file
BIN
trained_models/yolo8n.pt
Normal file
Binary file not shown.
Reference in New Issue
Block a user