Work putting one object over the other

2026-02-03 10:31:10 -03:00
parent 10e6792217
commit 45bbdcb9f5
9 changed files with 275 additions and 44 deletions
--- a/dora_voice_control/dora_voice_control/core/behavior.py
+++ b/dora_voice_control/dora_voice_control/core/behavior.py
@@ -129,8 +129,9 @@ class RobotBehavior(ABC):

    def _init_dspy(self, llm_config: LLMConfig) -> None:
        """Initialize DSPy predictor for this behavior."""
+        _log(f"Initializing DSPy with provider={llm_config.provider}, model={llm_config.model}")
        if not DSPY_AVAILABLE:
-            _log("DSPy not available, falling back to rules")
+            _log("DSPy not available (import failed), falling back to rules")
            return
        if self.CommandSignature is None:
            _log("No CommandSignature defined, falling back to rules")
@@ -141,8 +142,12 @@ class RobotBehavior(ABC):
                dspy.configure(lm=lm)
                self._predictor = dspy.Predict(self.CommandSignature)
                _log(f"DSPy initialized with {llm_config.provider}/{llm_config.model}")
+            else:
+                _log(f"Failed to create LM for provider={llm_config.provider}")
        except Exception as e:
+            import traceback
            _log(f"Failed to initialize DSPy: {e}")
+            _log(f"Traceback: {traceback.format_exc()}")

    def _create_lm(self, config: LLMConfig) -> Optional[Any]:
        """Create DSPy language model."""
@@ -171,8 +176,10 @@ class RobotBehavior(ABC):
    def parse_command(self, transcript: str) -> Dict[str, str]:
        """Parse voice command using DSPy or fallback to rules."""
        if self._predictor:
+            _log(f"Using DSPy to parse: '{transcript}'")
            try:
                result = self._predictor(comando=transcript)
+                _log(f"DSPy result: accion={result.accion}, objeto={result.objeto}, color={result.color}, tamano={result.tamano}")
                return {
                    "resultado": "ok" if result.accion != "error" else "error",
                    "accion": result.accion,
@@ -181,7 +188,11 @@ class RobotBehavior(ABC):
                    "tamano": result.tamano,
                }
            except Exception as e:
+                import traceback
                _log(f"DSPy parsing failed: {e}, falling back to rules")
+                _log(f"Traceback: {traceback.format_exc()}")
+        else:
+            _log(f"No DSPy predictor, using rules to parse: '{transcript}'")
        return self.rule_parse(transcript)

    def rule_parse(self, transcript: str) -> Dict[str, str]:
--- a/dora_voice_control/dora_voice_control/core/scene.py
+++ b/dora_voice_control/dora_voice_control/core/scene.py
@@ -73,6 +73,10 @@ class SceneState:
    def __init__(self) -> None:
        self._lock = threading.Lock()
        self._objects: Dict[str, SceneObject] = {}
+        # Scene update mode: "static" (default) or "dynamic"
+        self._update_mode: str = os.getenv("SCENE_UPDATE_MODE", "static").lower()
+        # Whether scene has been captured (STATIC mode only)
+        self._scene_captured: bool = False

    # === Core Operations ===

@@ -146,6 +150,8 @@ class SceneState:
            for obj in self._objects.values():
                if obj.on_top_of and obj.on_top_of not in self._objects:
                    obj.on_top_of = None
+            # Reset capture flag to allow next detection
+            self._scene_captured = False

    # === Query ===

@@ -176,6 +182,22 @@ class SceneState:
        with self._lock:
            return len(self._objects)

+    # === Scene Update Mode ===
+
+    def is_static_mode(self) -> bool:
+        """Check if in STATIC update mode."""
+        return self._update_mode == "static"
+
+    def is_captured(self) -> bool:
+        """Check if scene has been captured (STATIC mode)."""
+        with self._lock:
+            return self._scene_captured
+
+    def reset_capture(self) -> None:
+        """Reset capture flag to allow next detection to update scene."""
+        with self._lock:
+            self._scene_captured = False
+
    # === Spatial Relationships ===

    def set_on_top_of(self, object_id: str, below_id: Optional[str]) -> bool:
@@ -560,6 +582,10 @@ class ObjectsHandler:
        if not raw:
            return

+        # In STATIC mode, ignore updates after scene is captured
+        if self._scene.is_static_mode() and self._scene.is_captured():
+            return
+
        try:
            payload = json.loads(raw)
            objects = payload.get("objects", [])
@@ -569,5 +595,11 @@ class ObjectsHandler:

        self._scene.replace_detected(objects)

+        # Mark scene as captured after first successful update (STATIC mode)
+        if self._scene.is_static_mode():
+            with self._scene._lock:
+                self._scene._scene_captured = True
+            self._logger.log("Scene captured (STATIC mode)")
+
        # Emit scene update
        self._notifier.send_scene_update()
--- a/dora_voice_control/dora_voice_control/core/state.py
+++ b/dora_voice_control/dora_voice_control/core/state.py
@@ -24,6 +24,7 @@ class VoiceState:
    latest_pose_at: Optional[float] = None
    pending_command: Optional[Dict[str, Any]] = None
    queue: Deque[RobotStep] = field(default_factory=deque)
+    held_object_id: Optional[str] = None  # ID of currently held object


@dataclass
@@ -192,6 +193,21 @@ class SharedState:
        with self._lock:
            return self._debug_state.last_parse_result

+    def get_held_object_id(self) -> Optional[str]:
+        """Get the ID of the currently held object."""
+        with self._lock:
+            return self._voice_state.held_object_id
+
+    def set_held_object_id(self, object_id: Optional[str]) -> None:
+        """Set the ID of the currently held object."""
+        with self._lock:
+            self._voice_state.held_object_id = object_id
+
+    def clear_held_object(self) -> None:
+        """Clear the held object (after releasing)."""
+        with self._lock:
+            self._voice_state.held_object_id = None
+

 def _age_ms(timestamp: Optional[float]) -> Optional[int]:
    """Calculate age in milliseconds from monotonic timestamp."""
--- a/dora_voice_control/dora_voice_control/robots/littlehand/actions.py
+++ b/dora_voice_control/dora_voice_control/robots/littlehand/actions.py
@@ -8,39 +8,39 @@ from ...core.behavior import ActionInfo
 LITTLEHAND_ACTIONS: dict[str, ActionInfo] = {
    "subir": ActionInfo(
        name="subir",
-        aliases=["sube", "arriba"],
+        aliases=[],
        requires_pose=True,
        description="Subir el robot",
    ),
    "bajar": ActionInfo(
        name="bajar",
-        aliases=["baja", "abajo"],
+        aliases=[],
        requires_pose=True,
        description="Bajar el robot",
    ),
    "ir": ActionInfo(
        name="ir",
-        aliases=["ve", "mover", "muevete", "acercar"],
+        aliases=[],
        requires_object=True,
        description="Ir hacia un objeto",
    ),
    "tomar": ActionInfo(
        name="tomar",
-        aliases=["toma", "agarra", "agarrar", "coger", "chupar", "succionar"],
+        aliases=[],
        requires_pose=False,
        requires_object=False,
        description="Tomar un objeto",
    ),
    "soltar": ActionInfo(
        name="soltar",
-        aliases=["deja", "dejar"],
+        aliases=[],
        requires_pose=False,
        requires_object=False,
        description="Soltar el objeto",
    ),
    "reiniciar": ActionInfo(
        name="reiniciar",
-        aliases=["reinicia", "reset"],
+        aliases=[],
        requires_pose=False,
        requires_object=False,
        description="Reiniciar a posicion inicial",
--- a/dora_voice_control/dora_voice_control/robots/littlehand/behavior.py
+++ b/dora_voice_control/dora_voice_control/robots/littlehand/behavior.py
@@ -10,6 +10,7 @@ from .actions import LITTLEHAND_ACTIONS
 from .signature import LittlehandSignature

 _XY_MATCH_RADIUS_MM = float(os.getenv("BAJAR_XY_RADIUS_MM", "40.0"))
+_STACK_CLEARANCE_MM = float(os.getenv("STACK_CLEARANCE_MM", "5.0"))  # Clearance when placing on top of objects

 class LittlehandBehavior(RobotBehavior):
    """Littlehand behavior using the default pick-and-place actions."""
@@ -35,20 +36,92 @@ class LittlehandBehavior(RobotBehavior):
        return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)

    def action_bajar(self, ctx: ActionContext) -> bool:
-        """Move down by step_mm or to top of object under the tool."""
-        target = self._find_object_under_pose(ctx)
-        if target is not None:
-            target_z = target.position_mm[2] + ctx.config.tcp_offset_mm
-            _log(
-                f"bajar: using object '{target.object_type}' color={target.color} "
-                f"obj_z={target.position_mm[2]:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
-                f"target_z={target_z:.1f} at pose_z={ctx.pose[2]:.1f}"
-            )
+        """Move down by step_mm or to top of object under the tool.
+
+        If holding an object, accounts for its height when placing on obstacles.
+
+        Note: position_mm[2] from the camera represents the TOP surface of the object
+        (camera looks down, so it sees the top). We use height_mm only for the HELD
+        object to calculate placement position.
+        """
+        obstacle = self._find_object_under_pose(ctx)
+
+        # Get held object height for stack-aware placement
+        held_height = self._get_held_object_height(ctx)
+
+        if obstacle is not None:
+            # obstacle.position_mm[2] is the TOP surface of the obstacle
+            obstacle_top_z = obstacle.position_mm[2]
+
+            if held_height > 0:
+                # Stack-aware: place held object on top of obstacle
+                # When vacuum releases, bottom of held object should be at obstacle_top
+                # TCP needs to be at: obstacle_top + held_height + tcp_offset
+                target_z = obstacle_top_z + held_height + _STACK_CLEARANCE_MM + ctx.config.tcp_offset_mm
+                _log(
+                    f"bajar: STACK-AWARE placement on '{obstacle.object_type}' color={obstacle.color} "
+                    f"obstacle_top_z={obstacle_top_z:.1f} held_height={held_height:.1f} "
+                    f"clearance={_STACK_CLEARANCE_MM:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
+                    f"target_z={target_z:.1f}"
+                )
+            else:
+                # Not holding anything: move TCP to object top (for grabbing)
+                target_z = obstacle_top_z + ctx.config.tcp_offset_mm
+                _log(
+                    f"bajar: move to object '{obstacle.object_type}' color={obstacle.color} "
+                    f"obstacle_top_z={obstacle_top_z:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
+                    f"target_z={target_z:.1f}"
+                )
            return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)
+
        target_z = ctx.pose[2] - self.config.step_mm
        _log(f"bajar: no object under pose, step to z={target_z:.1f}")
        return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)

+    def _get_held_object_height(self, ctx: ActionContext) -> float:
+        """Get the height of the currently held object.
+
+        Uses configured height from config.toml [object_heights] section,
+        falling back to the detected height_mm.
+        """
+        held_id = ctx.shared_state.get_held_object_id()
+        if not held_id:
+            return 0.0
+
+        held_obj = ctx.scene.get(held_id)
+        if not held_obj:
+            _log(f"bajar: held object id={held_id} not found in scene")
+            return 0.0
+
+        # Use configured height based on object type and size
+        height = self._get_configured_height(held_obj.object_type, held_obj.size)
+        _log(f"bajar: holding object id={held_id} type={held_obj.object_type} size={held_obj.size} height={height:.1f}mm")
+        return height
+
+    def _get_configured_height(self, object_type: str, size: str) -> float:
+        """Get configured height for object type/size from environment or defaults."""
+        # Try specific key like OBJECT_HEIGHT_CUBE_BIG
+        key = f"OBJECT_HEIGHT_{object_type.upper()}_{size.upper()}"
+        height_str = os.getenv(key)
+        if height_str:
+            try:
+                return float(height_str)
+            except ValueError:
+                pass
+
+        # Try generic key like OBJECT_HEIGHT_CUBE
+        key = f"OBJECT_HEIGHT_{object_type.upper()}"
+        height_str = os.getenv(key)
+        if height_str:
+            try:
+                return float(height_str)
+            except ValueError:
+                pass
+
+        # Default height (configurable via OBJECT_HEIGHT_DEFAULT)
+        default = float(os.getenv("OBJECT_HEIGHT_DEFAULT", "40.0"))
+        return default
+
    def action_ir(self, ctx: ActionContext) -> bool:
        """Move to object X/Y while keeping current Z."""
        if ctx.pose is None or ctx.target is None:
@@ -57,24 +130,45 @@ class LittlehandBehavior(RobotBehavior):
        return self._queue_move(ctx, pos[0], pos[1], ctx.pose[2])

    def action_tomar(self, ctx: ActionContext) -> bool:
-        """Activate tool (low-level grab)."""
+        """Activate tool (low-level grab) and track held object."""
        self._queue_steps(ctx, self.robot_adapter.grab())
+
+        # Find the object under current pose to track for stack-aware placement
+        # (ctx.target may be None since requires_object=False in littlehand)
+        target_obj = ctx.target or self._find_object_under_pose(ctx)
+        if target_obj is not None:
+            ctx.shared_state.set_held_object_id(target_obj.id)
+            height = self._get_configured_height(target_obj.object_type, target_obj.size)
+            _log(f"tomar: now holding object id={target_obj.id} type={target_obj.object_type} height={height:.1f}mm")
+        else:
+            _log("tomar: no object found under pose, not tracking held object")
        return True

    def action_soltar(self, ctx: ActionContext) -> bool:
-        """Deactivate tool (low-level release)."""
+        """Deactivate tool (low-level release) and clear held object."""
        self._queue_steps(ctx, self.robot_adapter.release())
+        # Clear the held object tracking
+        held_id = ctx.shared_state.get_held_object_id()
+        if held_id:
+            _log(f"soltar: released object id={held_id}")
+        ctx.shared_state.clear_held_object()
        return True

    def action_reiniciar(self, ctx: ActionContext) -> bool:
-        """Reset: release tool, move home, clear objects."""
+        """Reset: release tool, move home, clear objects and held state."""
        self._queue_steps(ctx, self.robot_adapter.reset_tool())
        self._queue_steps(ctx, self.robot_adapter.move(ctx.home_pose))
        ctx.scene.clear_detected()
+        ctx.shared_state.clear_held_object()
+        _log("reiniciar: cleared held object state")
        return True

    def _find_object_under_pose(self, ctx: ActionContext) -> Optional["SceneObject"]:
-        """Find the topmost object near the current pose x,y (mm)."""
+        """Find the topmost object near the current pose x,y (mm).
+
+        Note: position_mm[2] is treated as the TOP surface of the object
+        (camera looks down, sees the top surface).
+        """
        if ctx.pose is None:
            _log("bajar: missing pose, cannot find object under tool")
            return None
@@ -89,20 +183,19 @@ class LittlehandBehavior(RobotBehavior):
            dist2 = dx * dx + dy * dy
            if dist2 > _XY_MATCH_RADIUS_MM * _XY_MATCH_RADIUS_MM:
                continue
-            top_surface = obj.position_mm[2] + obj.height_mm
-            candidates.append((top_surface, obj))
+            # position_mm[2] IS the top surface (camera sees top of object)
+            top_surface_z = obj.position_mm[2]
+            candidates.append((top_surface_z, obj))
            _log(
                "bajar: near id={} type={} color={} center=({:.1f},{:.1f}) "
-                "dist_xy={:.1f} obj_z={:.1f} height={:.1f} top_z={:.1f}".format(
+                "dist_xy={:.1f} top_z={:.1f}".format(
                    obj.id,
                    obj.object_type,
                    obj.color,
                    obj.position_mm[0],
                    obj.position_mm[1],
                    (dist2 ** 0.5),
-                    obj.position_mm[2],
-                    obj.height_mm,
-                    top_surface,
+                    top_surface_z,
                )
            )
        if not candidates:
--- a/dora_voice_control/dora_voice_control/robots/littlehand/signature.py
+++ b/dora_voice_control/dora_voice_control/robots/littlehand/signature.py
@@ -14,7 +14,11 @@ if dspy is not None:

        comando = dspy.InputField(desc="Voice command in Spanish")
        accion = dspy.OutputField(
-            desc="Action name: subir, bajar, ir, tomar, soltar, reiniciar or error"
+            desc=(
+                "Accion: subir, bajar, ir, tomar, soltar, reiniciar o error. "
+                "Mapea errores infantiles, parafrasis y sinonimos cercanos a la accion valida mas cercana. "
+                "Si la intencion es ambigua o no relacionada, devuelve error."
+            )
        )
        objeto = dspy.OutputField(
            desc="Object name (cubo, cilindro, estrella, caja) or 'no especificado'"