fix: up

2025-08-21 12:51:13 +08:00 · 2025-08-21 12:51:13 +08:00 · e0006cb2f3
commit e0006cb2f3
parent 253f5eab81
10 changed files with 315 additions and 55 deletions
--- a/main.py
+++ b/main.py
@ -75,7 +75,7 @@ def main():
            go_until_yellow_area(Ctrl, msg, yellow_ratio_threshold=0.05)
        elif TASK == TaskType.RED_BAR:
            from task_4.task_4 import go_straight_until_red_bar
-            go_straight_until_red_bar(Ctrl, msg)
+            go_straight_until_red_bar(Ctrl, msg, red_ratio_threshold=0.18)
        elif TASK == TaskType.UP_AND_DOWN:
            from task_3.task_3 import go_straight_with_enhanced_calibration
            go_straight_with_enhanced_calibration(Ctrl, msg, distance = 5, speed=0.5, observe=False, mode=11, gait_id=3, step_height=[0.21, 0.21])
--- a/task_3/task_3.py
+++ b/task_3/task_3.py
@ -27,7 +27,7 @@ from utils.speech_demo import speak
 logger = get_logger("任务3")

 observe = False
-YELLOW_RATIO_THRESHOLD = 0.03  # TODO 黄色区域比例阈值
+YELLOW_RATIO_THRESHOLD = 0.04  # TODO 黄色区域比例阈值

 def run_task_3(ctrl, msg, time_sleep=5000):
    section('任务3：上下坡', "启动")
--- a/test/text-image/README.md
+++ b/test/text-image/README.md
@ -0,0 +1,72 @@
+# 文字识别程序（改进版）
+
+这是一个使用OpenCV识别图片中文字的Python程序，采用先识别白色区域再识别文字的两步法策略。
+
+## 功能特点
+
+- **两步识别策略**：先检测白色区域，再在白色区域内识别文字
+- 使用HSV色彩空间精确分离白色区域
+- 基于轮廓检测和特征分析的文字分类
+- 能够识别数字1、2和字母B等字符
+- 自动保存识别结果图片，包含区域标注和字符识别
+
+## 安装依赖
+
+```bash
+pip install -r requirements.txt
+```
+
+## 使用方法
+
+1. 将需要识别的图片放在`imgs/`文件夹中
+2. 运行程序：
+   ```bash
+   python text_image.py
+   ```
+
+## 程序流程
+
+### 第一步：白色区域检测
+1. 转换到HSV色彩空间
+2. 使用颜色阈值分离白色区域
+3. 形态学操作连接相近区域
+4. 轮廓检测和筛选
+
+### 第二步：文字识别
+1. 在白色区域内提取ROI
+2. 二值化处理突出文字
+3. 查找文字轮廓
+4. 特征提取和分类
+
+## 识别原理
+
+- **数字1**：垂直投影方差小，水平投影方差小，中心密度低
+- **数字2**：垂直投影方差大，水平投影方差大，中心密度高
+- **字母B**：水平投影方差大，整体密度高
+
+## 输出说明
+
+- **蓝色边框**：标注检测到的白色区域
+- **绿色边框**：标注成功识别的字符
+- **红色边框**：标注无法识别的字符
+- **文字标签**：显示识别结果（1、2、B、unknown）
+
+## 输出文件
+
+程序会在`imgs/`文件夹中生成：
+- `result_1_improved.jpg`：第一张图片的识别结果
+- `result_2_improved.jpg`：第二张图片的识别结果
+
+## 优势
+
+- 避免了复杂背景的干扰
+- 提高了文字识别的准确性
+- 清晰的区域划分和结果展示
+- 更好的噪声过滤
+
+## 注意事项
+
+- 确保图片中有明显的白色区域
+- 白色区域内的文字对比度要足够
+- 程序会自动过滤太小的区域和轮廓
+- 识别结果包含区域ID和字符ID信息
--- a/test/text-image/imgs/1.jpg
+++ b/test/text-image/imgs/1.jpg
--- a/test/text-image/imgs/2.jpg
+++ b/test/text-image/imgs/2.jpg
--- a/test/text-image/imgs/a1.jpg
+++ b/test/text-image/imgs/a1.jpg
--- a/test/text-image/imgs/result_1_improved.jpg
+++ b/test/text-image/imgs/result_1_improved.jpg
--- a/test/text-image/requirements.txt
+++ b/test/text-image/requirements.txt
@ -0,0 +1,3 @@
+opencv-python>=4.5.0
+numpy>=1.19.0
+pathlib
--- a/test/text-image/text_image.py
+++ b/test/text-image/text_image.py
@ -0,0 +1,238 @@
+import cv2
+import numpy as np
+import os
+from pathlib import Path
+
+def detect_white_paper(image):
+    """
+    检测图像中的白色纸张区域
+    """
+    # 转换为HSV色彩空间
+    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+    
+    # 更严格的白色检测阈值
+    lower_white = np.array([0, 0, 230])  # 高亮度，低饱和度
+    upper_white = np.array([180, 20, 255])
+    
+    # 创建白色掩码
+    white_mask = cv2.inRange(hsv, lower_white, upper_white)
+    
+    # 形态学操作：去除噪点
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    white_mask = cv2.morphologyEx(white_mask, cv2.MORPH_OPEN, kernel)
+    
+    # 查找轮廓
+    contours, _ = cv2.findContours(white_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    # 筛选合适的白色区域（基于面积和形状）
+    paper_regions = []
+    for contour in contours:
+        area = cv2.contourArea(contour)
+        if area > 5000:  # 过滤太小的区域
+            x, y, w, h = cv2.boundingRect(contour)
+            aspect_ratio = w / h
+            # 纸张通常是横向的
+            if 1.0 < aspect_ratio < 3.0:
+                paper_regions.append((x, y, w, h))
+    
+    return paper_regions, white_mask
+
+def find_text_regions(roi):
+    """
+    在ROI中查找文字区域
+    """
+    # 转换为灰度图
+    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+    
+    # 自适应二值化，更好地处理不同光照条件
+    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
+                                  cv2.THRESH_BINARY_INV, 11, 2)
+    
+    # 形态学操作：连接文字笔画
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
+    
+    # 查找轮廓
+    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    # 筛选文字轮廓
+    text_regions = []
+    for contour in contours:
+        area = cv2.contourArea(contour)
+        if 200 < area < 10000:  # 合理的文字面积范围
+            x, y, w, h = cv2.boundingRect(contour)
+            aspect_ratio = w / h
+            # 文字通常有合理的宽高比
+            if 0.3 < aspect_ratio < 4.0:
+                text_regions.append((x, y, w, h))
+    
+    return text_regions, binary
+
+def classify_digit(roi, region):
+    """
+    分类数字1或2
+    """
+    x, y, w, h = region
+    
+    # 提取字符ROI
+    char_roi = roi[y:y+h, x:x+w]
+    
+    # 转换为灰度并二值化
+    gray_char = cv2.cvtColor(char_roi, cv2.COLOR_BGR2GRAY)
+    _, binary_char = cv2.threshold(gray_char, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    
+    # 调整大小到统一尺寸
+    char_resized = cv2.resize(binary_char, (20, 20))
+    
+    # 计算特征
+    total_pixels = char_resized.shape[0] * char_resized.shape[1]
+    black_pixels = np.sum(char_resized == 255)
+    density = black_pixels / total_pixels
+    
+    # 计算水平和垂直投影
+    horizontal_proj = np.sum(char_resized == 255, axis=1)
+    vertical_proj = np.sum(char_resized == 255, axis=0)
+    
+    # 计算投影的方差
+    h_variance = np.var(horizontal_proj)
+    v_variance = np.var(vertical_proj)
+    
+    # 计算中心区域密度
+    center_region = char_resized[6:14, 6:14]
+    center_density = np.sum(center_region == 255) / (8 * 8)
+    
+    # 基于特征分类
+    if density < 0.2:
+        return "unknown"
+    
+    # 数字1的特征：垂直投影方差小，中心密度低
+    if v_variance < 8 and center_density < 0.4 and density < 0.5:
+        return "1"
+    
+    # 数字2的特征：垂直投影方差大，中心密度高
+    if v_variance > 12 and center_density > 0.3 and density > 0.4:
+        return "2"
+    
+    return "unknown"
+
+def find_a_and_digit(image_path):
+    """
+    查找A和右侧的数字
+    """
+    # 读取图像
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"无法读取图像: {image_path}")
+        return None, []
+    
+    # 检测白色纸张区域
+    paper_regions, white_mask = detect_white_paper(image)
+    
+    if not paper_regions:
+        print("未检测到白色纸张区域")
+        return image, []
+    
+    print(f"检测到 {len(paper_regions)} 个白色纸张区域")
+    
+    all_results = []
+    
+    # 处理每个纸张区域
+    for i, paper_region in enumerate(paper_regions):
+        x, y, w, h = paper_region
+        
+        print(f"处理纸张区域 {i+1}: 位置({x}, {y}), 大小({w}x{h})")
+        
+        # 提取纸张ROI
+        paper_roi = image[y:y+h, x:x+w]
+        
+        # 查找文字区域
+        text_regions, binary = find_text_regions(paper_roi)
+        
+        print(f"  在区域 {i+1} 中找到 {len(text_regions)} 个文字区域")
+        
+        # 按x坐标排序文字区域（从左到右）
+        text_regions.sort(key=lambda r: r[0])
+        
+        # 查找A和右侧的数字
+        a_found = False
+        digit_found = False
+        
+        for j, text_region in enumerate(text_regions):
+            tx, ty, tw, th = text_region
+            
+            # 简单判断：如果区域较宽，可能是A；如果较窄，可能是数字
+            if tw > th * 1.5 and not a_found:  # 可能是A
+                a_found = True
+                result = {
+                    'region_id': i + 1,
+                    'char_id': j + 1,
+                    'char_type': 'A',
+                    'position': (x + tx, y + ty, tw, th),
+                    'relative_position': (tx, ty, tw, th)
+                }
+                all_results.append(result)
+                print(f"    找到A: 位置({tx}, {ty}), 大小({tw}x{th})")
+                
+            elif tw <= th * 1.5 and a_found and not digit_found:  # 可能是数字
+                # 分类数字
+                digit_type = classify_digit(paper_roi, text_region)
+                if digit_type in ['1', '2']:
+                    digit_found = True
+                    result = {
+                        'region_id': i + 1,
+                        'char_id': j + 1,
+                        'char_type': digit_type,
+                        'position': (x + tx, y + ty, tw, th),
+                        'relative_position': (tx, ty, tw, th)
+                    }
+                    all_results.append(result)
+                    print(f"    找到数字{digit_type}: 位置({tx}, {ty}), 大小({tw}x{th})")
+                    break
+        
+        # 在纸张周围绘制边框
+        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 3)
+        cv2.putText(image, f"Paper {i+1}", (x, y - 10), 
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
+    
+    # 在识别结果周围绘制边框
+    for result in all_results:
+        rx, ry, rw, rh = result['position']
+        color = (0, 255, 0) if result['char_type'] != "unknown" else (0, 0, 255)
+        cv2.rectangle(image, (rx, ry), (rx + rw, ry + rh), color, 2)
+        cv2.putText(image, result['char_type'], (rx, ry - 5), 
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+    
+    return image, all_results
+
+def main():
+    """
+    主函数：测试A和数字识别
+    """
+    # 图片路径
+    img_dir = Path("imgs")
+    img_path = img_dir / "a1.jpg"
+    
+    print("开始识别图片中的A和右侧数字...")
+    
+    # 识别图片
+    if img_path.exists():
+        print(f"\n识别图片: {img_path}")
+        result_img, results = find_a_and_digit(str(img_path))
+        
+        if results:
+            print("\n识别结果:")
+            for result in results:
+                print(f"  区域{result['region_id']}-字符{result['char_id']}: {result['char_type']}, "
+                      f"位置: {result['position']}")
+        else:
+            print("未识别到A和数字")
+        
+        # 保存结果图片
+        output_path = img_dir / "result_a_digit.jpg"
+        cv2.imwrite(str(output_path), result_img)
+        print(f"\n结果图片已保存到: {output_path}")
+
+    print("\n识别完成！")
+
+if __name__ == "__main__":
+    main()
--- a/utils/shoushi_demo.py
+++ b/utils/shoushi_demo.py
@ -118,58 +118,6 @@ class GestureControlNode(Node):
        elif msg.id == 7:  # 停止手势
            self.get_logger().info("检测到停止手势")

-    def complete_loading(self):
-        """完成配货操作"""
-        self.get_logger().info("配货完成，开始运输")
-        
-        # 调用开始运输服务
-        if self.start_transport_cli.wait_for_service(timeout_sec=1.0):
-            req = Trigger.Request()
-            future = self.start_transport_cli.call_async(req)
-            future.add_done_callback(self.transport_start_callback)
-        else:
-            self.get_logger().warn("开始运输服务不可用")
-        
-        # 更新状态
-        self.current_state = "transporting"
-
-    def transport_start_callback(self, future):
-        """运输开始服务回调"""
-        try:
-            response = future.result()
-            if response.success:
-                self.get_logger().info("运输已开始")
-            else:
-                self.get_logger().warn("运输启动失败")
-        except Exception as e:
-            self.get_logger().error(f"运输服务调用失败: {e}")
-
-    def complete_unloading(self):
-        """完成卸货操作"""
-        self.get_logger().info("卸货完成，准备新的配货")
-        
-        # 调用完成卸货服务
-        if self.complete_unloading_cli.wait_for_service(timeout_sec=1.0):
-            req = Trigger.Request()
-            future = self.complete_unloading_cli.call_async(req)
-            future.add_done_callback(self.unloading_complete_callback)
-        else:
-            self.get_logger().warn("完成卸货服务不可用")
-        
-        # 更新状态
-        self.current_state = "waiting_for_loading"
-
-    def unloading_complete_callback(self, future):
-        """卸货完成服务回调"""
-        try:
-            response = future.result()
-            if response.success:
-                self.get_logger().info("卸货已完成确认")
-            else:
-                self.get_logger().warn("卸货完成确认失败")
-        except Exception as e:
-            self.get_logger().error(f"卸货完成服务调用失败: {e}")
-
    def timer_callback(self):
        """定时器回调，用于状态检查和超时处理"""
        # 检查手势识别是否激活且超时
@ -181,7 +129,6 @@ class GestureControlNode(Node):
        # 这里可以添加状态机逻辑，根据实际需求更新current_state
        # 例如，当机器狗到达配货区域时，设置self.current_state = "loading"
        # 当机器狗到达卸货区域时，设置self.current_state = "unloading"
-
    def update_state(self, new_state):
        """更新机器狗状态"""
        old_state = self.current_state