Work putting one object over the other
This commit is contained in:
@@ -129,8 +129,9 @@ class RobotBehavior(ABC):
|
||||
|
||||
def _init_dspy(self, llm_config: LLMConfig) -> None:
|
||||
"""Initialize DSPy predictor for this behavior."""
|
||||
_log(f"Initializing DSPy with provider={llm_config.provider}, model={llm_config.model}")
|
||||
if not DSPY_AVAILABLE:
|
||||
_log("DSPy not available, falling back to rules")
|
||||
_log("DSPy not available (import failed), falling back to rules")
|
||||
return
|
||||
if self.CommandSignature is None:
|
||||
_log("No CommandSignature defined, falling back to rules")
|
||||
@@ -141,8 +142,12 @@ class RobotBehavior(ABC):
|
||||
dspy.configure(lm=lm)
|
||||
self._predictor = dspy.Predict(self.CommandSignature)
|
||||
_log(f"DSPy initialized with {llm_config.provider}/{llm_config.model}")
|
||||
else:
|
||||
_log(f"Failed to create LM for provider={llm_config.provider}")
|
||||
except Exception as e:
|
||||
import traceback
|
||||
_log(f"Failed to initialize DSPy: {e}")
|
||||
_log(f"Traceback: {traceback.format_exc()}")
|
||||
|
||||
def _create_lm(self, config: LLMConfig) -> Optional[Any]:
|
||||
"""Create DSPy language model."""
|
||||
@@ -171,8 +176,10 @@ class RobotBehavior(ABC):
|
||||
def parse_command(self, transcript: str) -> Dict[str, str]:
|
||||
"""Parse voice command using DSPy or fallback to rules."""
|
||||
if self._predictor:
|
||||
_log(f"Using DSPy to parse: '{transcript}'")
|
||||
try:
|
||||
result = self._predictor(comando=transcript)
|
||||
_log(f"DSPy result: accion={result.accion}, objeto={result.objeto}, color={result.color}, tamano={result.tamano}")
|
||||
return {
|
||||
"resultado": "ok" if result.accion != "error" else "error",
|
||||
"accion": result.accion,
|
||||
@@ -181,7 +188,11 @@ class RobotBehavior(ABC):
|
||||
"tamano": result.tamano,
|
||||
}
|
||||
except Exception as e:
|
||||
import traceback
|
||||
_log(f"DSPy parsing failed: {e}, falling back to rules")
|
||||
_log(f"Traceback: {traceback.format_exc()}")
|
||||
else:
|
||||
_log(f"No DSPy predictor, using rules to parse: '{transcript}'")
|
||||
return self.rule_parse(transcript)
|
||||
|
||||
def rule_parse(self, transcript: str) -> Dict[str, str]:
|
||||
|
||||
@@ -73,6 +73,10 @@ class SceneState:
|
||||
def __init__(self) -> None:
|
||||
self._lock = threading.Lock()
|
||||
self._objects: Dict[str, SceneObject] = {}
|
||||
# Scene update mode: "static" (default) or "dynamic"
|
||||
self._update_mode: str = os.getenv("SCENE_UPDATE_MODE", "static").lower()
|
||||
# Whether scene has been captured (STATIC mode only)
|
||||
self._scene_captured: bool = False
|
||||
|
||||
# === Core Operations ===
|
||||
|
||||
@@ -146,6 +150,8 @@ class SceneState:
|
||||
for obj in self._objects.values():
|
||||
if obj.on_top_of and obj.on_top_of not in self._objects:
|
||||
obj.on_top_of = None
|
||||
# Reset capture flag to allow next detection
|
||||
self._scene_captured = False
|
||||
|
||||
# === Query ===
|
||||
|
||||
@@ -176,6 +182,22 @@ class SceneState:
|
||||
with self._lock:
|
||||
return len(self._objects)
|
||||
|
||||
# === Scene Update Mode ===
|
||||
|
||||
def is_static_mode(self) -> bool:
|
||||
"""Check if in STATIC update mode."""
|
||||
return self._update_mode == "static"
|
||||
|
||||
def is_captured(self) -> bool:
|
||||
"""Check if scene has been captured (STATIC mode)."""
|
||||
with self._lock:
|
||||
return self._scene_captured
|
||||
|
||||
def reset_capture(self) -> None:
|
||||
"""Reset capture flag to allow next detection to update scene."""
|
||||
with self._lock:
|
||||
self._scene_captured = False
|
||||
|
||||
# === Spatial Relationships ===
|
||||
|
||||
def set_on_top_of(self, object_id: str, below_id: Optional[str]) -> bool:
|
||||
@@ -560,6 +582,10 @@ class ObjectsHandler:
|
||||
if not raw:
|
||||
return
|
||||
|
||||
# In STATIC mode, ignore updates after scene is captured
|
||||
if self._scene.is_static_mode() and self._scene.is_captured():
|
||||
return
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
objects = payload.get("objects", [])
|
||||
@@ -569,5 +595,11 @@ class ObjectsHandler:
|
||||
|
||||
self._scene.replace_detected(objects)
|
||||
|
||||
# Mark scene as captured after first successful update (STATIC mode)
|
||||
if self._scene.is_static_mode():
|
||||
with self._scene._lock:
|
||||
self._scene._scene_captured = True
|
||||
self._logger.log("Scene captured (STATIC mode)")
|
||||
|
||||
# Emit scene update
|
||||
self._notifier.send_scene_update()
|
||||
|
||||
@@ -24,6 +24,7 @@ class VoiceState:
|
||||
latest_pose_at: Optional[float] = None
|
||||
pending_command: Optional[Dict[str, Any]] = None
|
||||
queue: Deque[RobotStep] = field(default_factory=deque)
|
||||
held_object_id: Optional[str] = None # ID of currently held object
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -192,6 +193,21 @@ class SharedState:
|
||||
with self._lock:
|
||||
return self._debug_state.last_parse_result
|
||||
|
||||
def get_held_object_id(self) -> Optional[str]:
|
||||
"""Get the ID of the currently held object."""
|
||||
with self._lock:
|
||||
return self._voice_state.held_object_id
|
||||
|
||||
def set_held_object_id(self, object_id: Optional[str]) -> None:
|
||||
"""Set the ID of the currently held object."""
|
||||
with self._lock:
|
||||
self._voice_state.held_object_id = object_id
|
||||
|
||||
def clear_held_object(self) -> None:
|
||||
"""Clear the held object (after releasing)."""
|
||||
with self._lock:
|
||||
self._voice_state.held_object_id = None
|
||||
|
||||
|
||||
def _age_ms(timestamp: Optional[float]) -> Optional[int]:
|
||||
"""Calculate age in milliseconds from monotonic timestamp."""
|
||||
|
||||
@@ -8,39 +8,39 @@ from ...core.behavior import ActionInfo
|
||||
LITTLEHAND_ACTIONS: dict[str, ActionInfo] = {
|
||||
"subir": ActionInfo(
|
||||
name="subir",
|
||||
aliases=["sube", "arriba"],
|
||||
aliases=[],
|
||||
requires_pose=True,
|
||||
description="Subir el robot",
|
||||
),
|
||||
"bajar": ActionInfo(
|
||||
name="bajar",
|
||||
aliases=["baja", "abajo"],
|
||||
aliases=[],
|
||||
requires_pose=True,
|
||||
description="Bajar el robot",
|
||||
),
|
||||
"ir": ActionInfo(
|
||||
name="ir",
|
||||
aliases=["ve", "mover", "muevete", "acercar"],
|
||||
aliases=[],
|
||||
requires_object=True,
|
||||
description="Ir hacia un objeto",
|
||||
),
|
||||
"tomar": ActionInfo(
|
||||
name="tomar",
|
||||
aliases=["toma", "agarra", "agarrar", "coger", "chupar", "succionar"],
|
||||
aliases=[],
|
||||
requires_pose=False,
|
||||
requires_object=False,
|
||||
description="Tomar un objeto",
|
||||
),
|
||||
"soltar": ActionInfo(
|
||||
name="soltar",
|
||||
aliases=["deja", "dejar"],
|
||||
aliases=[],
|
||||
requires_pose=False,
|
||||
requires_object=False,
|
||||
description="Soltar el objeto",
|
||||
),
|
||||
"reiniciar": ActionInfo(
|
||||
name="reiniciar",
|
||||
aliases=["reinicia", "reset"],
|
||||
aliases=[],
|
||||
requires_pose=False,
|
||||
requires_object=False,
|
||||
description="Reiniciar a posicion inicial",
|
||||
|
||||
@@ -10,6 +10,7 @@ from .actions import LITTLEHAND_ACTIONS
|
||||
from .signature import LittlehandSignature
|
||||
|
||||
_XY_MATCH_RADIUS_MM = float(os.getenv("BAJAR_XY_RADIUS_MM", "40.0"))
|
||||
_STACK_CLEARANCE_MM = float(os.getenv("STACK_CLEARANCE_MM", "5.0")) # Clearance when placing on top of objects
|
||||
|
||||
class LittlehandBehavior(RobotBehavior):
|
||||
"""Littlehand behavior using the default pick-and-place actions."""
|
||||
@@ -35,20 +36,92 @@ class LittlehandBehavior(RobotBehavior):
|
||||
return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)
|
||||
|
||||
def action_bajar(self, ctx: ActionContext) -> bool:
|
||||
"""Move down by step_mm or to top of object under the tool."""
|
||||
target = self._find_object_under_pose(ctx)
|
||||
if target is not None:
|
||||
target_z = target.position_mm[2] + ctx.config.tcp_offset_mm
|
||||
_log(
|
||||
f"bajar: using object '{target.object_type}' color={target.color} "
|
||||
f"obj_z={target.position_mm[2]:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
|
||||
f"target_z={target_z:.1f} at pose_z={ctx.pose[2]:.1f}"
|
||||
)
|
||||
"""Move down by step_mm or to top of object under the tool.
|
||||
|
||||
If holding an object, accounts for its height when placing on obstacles.
|
||||
|
||||
Note: position_mm[2] from the camera represents the TOP surface of the object
|
||||
(camera looks down, so it sees the top). We use height_mm only for the HELD
|
||||
object to calculate placement position.
|
||||
"""
|
||||
obstacle = self._find_object_under_pose(ctx)
|
||||
|
||||
# Get held object height for stack-aware placement
|
||||
held_height = self._get_held_object_height(ctx)
|
||||
|
||||
if obstacle is not None:
|
||||
# obstacle.position_mm[2] is the TOP surface of the obstacle
|
||||
obstacle_top_z = obstacle.position_mm[2]
|
||||
|
||||
if held_height > 0:
|
||||
# Stack-aware: place held object on top of obstacle
|
||||
# When vacuum releases, bottom of held object should be at obstacle_top
|
||||
# TCP needs to be at: obstacle_top + held_height + tcp_offset
|
||||
target_z = obstacle_top_z + held_height + _STACK_CLEARANCE_MM + ctx.config.tcp_offset_mm
|
||||
_log(
|
||||
f"bajar: STACK-AWARE placement on '{obstacle.object_type}' color={obstacle.color} "
|
||||
f"obstacle_top_z={obstacle_top_z:.1f} held_height={held_height:.1f} "
|
||||
f"clearance={_STACK_CLEARANCE_MM:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
|
||||
f"target_z={target_z:.1f}"
|
||||
)
|
||||
else:
|
||||
# Not holding anything: move TCP to object top (for grabbing)
|
||||
target_z = obstacle_top_z + ctx.config.tcp_offset_mm
|
||||
_log(
|
||||
f"bajar: move to object '{obstacle.object_type}' color={obstacle.color} "
|
||||
f"obstacle_top_z={obstacle_top_z:.1f} tcp_offset={ctx.config.tcp_offset_mm:.1f} "
|
||||
f"target_z={target_z:.1f}"
|
||||
)
|
||||
return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)
|
||||
|
||||
target_z = ctx.pose[2] - self.config.step_mm
|
||||
_log(f"bajar: no object under pose, step to z={target_z:.1f}")
|
||||
return self._queue_move(ctx, ctx.pose[0], ctx.pose[1], target_z)
|
||||
|
||||
def _get_held_object_height(self, ctx: ActionContext) -> float:
|
||||
"""Get the height of the currently held object.
|
||||
|
||||
Uses configured height from config.toml [object_heights] section,
|
||||
falling back to the detected height_mm.
|
||||
"""
|
||||
held_id = ctx.shared_state.get_held_object_id()
|
||||
if not held_id:
|
||||
return 0.0
|
||||
|
||||
held_obj = ctx.scene.get(held_id)
|
||||
if not held_obj:
|
||||
_log(f"bajar: held object id={held_id} not found in scene")
|
||||
return 0.0
|
||||
|
||||
# Use configured height based on object type and size
|
||||
height = self._get_configured_height(held_obj.object_type, held_obj.size)
|
||||
_log(f"bajar: holding object id={held_id} type={held_obj.object_type} size={held_obj.size} height={height:.1f}mm")
|
||||
return height
|
||||
|
||||
def _get_configured_height(self, object_type: str, size: str) -> float:
|
||||
"""Get configured height for object type/size from environment or defaults."""
|
||||
# Try specific key like OBJECT_HEIGHT_CUBE_BIG
|
||||
key = f"OBJECT_HEIGHT_{object_type.upper()}_{size.upper()}"
|
||||
height_str = os.getenv(key)
|
||||
if height_str:
|
||||
try:
|
||||
return float(height_str)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try generic key like OBJECT_HEIGHT_CUBE
|
||||
key = f"OBJECT_HEIGHT_{object_type.upper()}"
|
||||
height_str = os.getenv(key)
|
||||
if height_str:
|
||||
try:
|
||||
return float(height_str)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Default height (configurable via OBJECT_HEIGHT_DEFAULT)
|
||||
default = float(os.getenv("OBJECT_HEIGHT_DEFAULT", "40.0"))
|
||||
return default
|
||||
|
||||
def action_ir(self, ctx: ActionContext) -> bool:
|
||||
"""Move to object X/Y while keeping current Z."""
|
||||
if ctx.pose is None or ctx.target is None:
|
||||
@@ -57,24 +130,45 @@ class LittlehandBehavior(RobotBehavior):
|
||||
return self._queue_move(ctx, pos[0], pos[1], ctx.pose[2])
|
||||
|
||||
def action_tomar(self, ctx: ActionContext) -> bool:
|
||||
"""Activate tool (low-level grab)."""
|
||||
"""Activate tool (low-level grab) and track held object."""
|
||||
self._queue_steps(ctx, self.robot_adapter.grab())
|
||||
|
||||
# Find the object under current pose to track for stack-aware placement
|
||||
# (ctx.target may be None since requires_object=False in littlehand)
|
||||
target_obj = ctx.target or self._find_object_under_pose(ctx)
|
||||
if target_obj is not None:
|
||||
ctx.shared_state.set_held_object_id(target_obj.id)
|
||||
height = self._get_configured_height(target_obj.object_type, target_obj.size)
|
||||
_log(f"tomar: now holding object id={target_obj.id} type={target_obj.object_type} height={height:.1f}mm")
|
||||
else:
|
||||
_log("tomar: no object found under pose, not tracking held object")
|
||||
return True
|
||||
|
||||
def action_soltar(self, ctx: ActionContext) -> bool:
|
||||
"""Deactivate tool (low-level release)."""
|
||||
"""Deactivate tool (low-level release) and clear held object."""
|
||||
self._queue_steps(ctx, self.robot_adapter.release())
|
||||
# Clear the held object tracking
|
||||
held_id = ctx.shared_state.get_held_object_id()
|
||||
if held_id:
|
||||
_log(f"soltar: released object id={held_id}")
|
||||
ctx.shared_state.clear_held_object()
|
||||
return True
|
||||
|
||||
def action_reiniciar(self, ctx: ActionContext) -> bool:
|
||||
"""Reset: release tool, move home, clear objects."""
|
||||
"""Reset: release tool, move home, clear objects and held state."""
|
||||
self._queue_steps(ctx, self.robot_adapter.reset_tool())
|
||||
self._queue_steps(ctx, self.robot_adapter.move(ctx.home_pose))
|
||||
ctx.scene.clear_detected()
|
||||
ctx.shared_state.clear_held_object()
|
||||
_log("reiniciar: cleared held object state")
|
||||
return True
|
||||
|
||||
def _find_object_under_pose(self, ctx: ActionContext) -> Optional["SceneObject"]:
|
||||
"""Find the topmost object near the current pose x,y (mm)."""
|
||||
"""Find the topmost object near the current pose x,y (mm).
|
||||
|
||||
Note: position_mm[2] is treated as the TOP surface of the object
|
||||
(camera looks down, sees the top surface).
|
||||
"""
|
||||
if ctx.pose is None:
|
||||
_log("bajar: missing pose, cannot find object under tool")
|
||||
return None
|
||||
@@ -89,20 +183,19 @@ class LittlehandBehavior(RobotBehavior):
|
||||
dist2 = dx * dx + dy * dy
|
||||
if dist2 > _XY_MATCH_RADIUS_MM * _XY_MATCH_RADIUS_MM:
|
||||
continue
|
||||
top_surface = obj.position_mm[2] + obj.height_mm
|
||||
candidates.append((top_surface, obj))
|
||||
# position_mm[2] IS the top surface (camera sees top of object)
|
||||
top_surface_z = obj.position_mm[2]
|
||||
candidates.append((top_surface_z, obj))
|
||||
_log(
|
||||
"bajar: near id={} type={} color={} center=({:.1f},{:.1f}) "
|
||||
"dist_xy={:.1f} obj_z={:.1f} height={:.1f} top_z={:.1f}".format(
|
||||
"dist_xy={:.1f} top_z={:.1f}".format(
|
||||
obj.id,
|
||||
obj.object_type,
|
||||
obj.color,
|
||||
obj.position_mm[0],
|
||||
obj.position_mm[1],
|
||||
(dist2 ** 0.5),
|
||||
obj.position_mm[2],
|
||||
obj.height_mm,
|
||||
top_surface,
|
||||
top_surface_z,
|
||||
)
|
||||
)
|
||||
if not candidates:
|
||||
|
||||
@@ -14,7 +14,11 @@ if dspy is not None:
|
||||
|
||||
comando = dspy.InputField(desc="Voice command in Spanish")
|
||||
accion = dspy.OutputField(
|
||||
desc="Action name: subir, bajar, ir, tomar, soltar, reiniciar or error"
|
||||
desc=(
|
||||
"Accion: subir, bajar, ir, tomar, soltar, reiniciar o error. "
|
||||
"Mapea errores infantiles, parafrasis y sinonimos cercanos a la accion valida mas cercana. "
|
||||
"Si la intencion es ambigua o no relacionada, devuelve error."
|
||||
)
|
||||
)
|
||||
objeto = dspy.OutputField(
|
||||
desc="Object name (cubo, cilindro, estrella, caja) or 'no especificado'"
|
||||
|
||||
Reference in New Issue
Block a user