mi-task/test/text-image/text_image.py

import cv2
import numpy as np
import os
from pathlib import Path

def detect_white_paper(image):
    """
    检测图像中的白色纸张区域
    """
    # 转换为HSV色彩空间
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # 更严格的白色检测阈值
    lower_white = np.array([0, 0, 230])  # 高亮度，低饱和度
    upper_white = np.array([180, 20, 255])
    
    # 创建白色掩码
    white_mask = cv2.inRange(hsv, lower_white, upper_white)
    
    # 形态学操作：去除噪点
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    white_mask = cv2.morphologyEx(white_mask, cv2.MORPH_OPEN, kernel)
    
    # 查找轮廓
    contours, _ = cv2.findContours(white_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # 筛选合适的白色区域（基于面积和形状）
    paper_regions = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 5000:  # 过滤太小的区域
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / h
            # 纸张通常是横向的
            if 1.0 < aspect_ratio < 3.0:
                paper_regions.append((x, y, w, h))
    
    return paper_regions, white_mask

def find_text_regions(roi):
    """
    在ROI中查找文字区域
    """
    # 转换为灰度图
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    # 自适应二值化，更好地处理不同光照条件
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                  cv2.THRESH_BINARY_INV, 11, 2)
    
    # 形态学操作：连接文字笔画
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # 查找轮廓
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # 筛选文字轮廓
    text_regions = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if 200 < area < 10000:  # 合理的文字面积范围
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / h
            # 文字通常有合理的宽高比
            if 0.3 < aspect_ratio < 4.0:
                text_regions.append((x, y, w, h))
    
    return text_regions, binary

def classify_digit(roi, region):
    """
    分类数字1或2
    """
    x, y, w, h = region
    
    # 提取字符ROI
    char_roi = roi[y:y+h, x:x+w]
    
    # 转换为灰度并二值化
    gray_char = cv2.cvtColor(char_roi, cv2.COLOR_BGR2GRAY)
    _, binary_char = cv2.threshold(gray_char, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # 调整大小到统一尺寸
    char_resized = cv2.resize(binary_char, (20, 20))
    
    # 计算特征
    total_pixels = char_resized.shape[0] * char_resized.shape[1]
    black_pixels = np.sum(char_resized == 255)
    density = black_pixels / total_pixels
    
    # 计算水平和垂直投影
    horizontal_proj = np.sum(char_resized == 255, axis=1)
    vertical_proj = np.sum(char_resized == 255, axis=0)
    
    # 计算投影的方差
    h_variance = np.var(horizontal_proj)
    v_variance = np.var(vertical_proj)
    
    # 计算中心区域密度
    center_region = char_resized[6:14, 6:14]
    center_density = np.sum(center_region == 255) / (8 * 8)
    
    # 基于特征分类
    if density < 0.2:
        return "unknown"
    
    # 数字1的特征：垂直投影方差小，中心密度低
    if v_variance < 8 and center_density < 0.4 and density < 0.5:
        return "1"
    
    # 数字2的特征：垂直投影方差大，中心密度高
    if v_variance > 12 and center_density > 0.3 and density > 0.4:
        return "2"
    
    return "unknown"

def find_a_and_digit(image_path):
    """
    查找A和右侧的数字
    """
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"无法读取图像: {image_path}")
        return None, []
    
    # 检测白色纸张区域
    paper_regions, white_mask = detect_white_paper(image)
    
    if not paper_regions:
        print("未检测到白色纸张区域")
        return image, []
    
    print(f"检测到 {len(paper_regions)} 个白色纸张区域")
    
    all_results = []
    
    # 处理每个纸张区域
    for i, paper_region in enumerate(paper_regions):
        x, y, w, h = paper_region
        
        print(f"处理纸张区域 {i+1}: 位置({x}, {y}), 大小({w}x{h})")
        
        # 提取纸张ROI
        paper_roi = image[y:y+h, x:x+w]
        
        # 查找文字区域
        text_regions, binary = find_text_regions(paper_roi)
        
        print(f"  在区域 {i+1} 中找到 {len(text_regions)} 个文字区域")
        
        # 按x坐标排序文字区域（从左到右）
        text_regions.sort(key=lambda r: r[0])
        
        # 查找A和右侧的数字
        a_found = False
        digit_found = False
        
        for j, text_region in enumerate(text_regions):
            tx, ty, tw, th = text_region
            
            # 简单判断：如果区域较宽，可能是A；如果较窄，可能是数字
            if tw > th * 1.5 and not a_found:  # 可能是A
                a_found = True
                result = {
                    'region_id': i + 1,
                    'char_id': j + 1,
                    'char_type': 'A',
                    'position': (x + tx, y + ty, tw, th),
                    'relative_position': (tx, ty, tw, th)
                }
                all_results.append(result)
                print(f"    找到A: 位置({tx}, {ty}), 大小({tw}x{th})")
                
            elif tw <= th * 1.5 and a_found and not digit_found:  # 可能是数字
                # 分类数字
                digit_type = classify_digit(paper_roi, text_region)
                if digit_type in ['1', '2']:
                    digit_found = True
                    result = {
                        'region_id': i + 1,
                        'char_id': j + 1,
                        'char_type': digit_type,
                        'position': (x + tx, y + ty, tw, th),
                        'relative_position': (tx, ty, tw, th)
                    }
                    all_results.append(result)
                    print(f"    找到数字{digit_type}: 位置({tx}, {ty}), 大小({tw}x{th})")
                    break
        
        # 在纸张周围绘制边框
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 3)
        cv2.putText(image, f"Paper {i+1}", (x, y - 10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
    
    # 在识别结果周围绘制边框
    for result in all_results:
        rx, ry, rw, rh = result['position']
        color = (0, 255, 0) if result['char_type'] != "unknown" else (0, 0, 255)
        cv2.rectangle(image, (rx, ry), (rx + rw, ry + rh), color, 2)
        cv2.putText(image, result['char_type'], (rx, ry - 5), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    
    return image, all_results

def main():
    """
    主函数：测试A和数字识别
    """
    # 图片路径
    img_dir = Path("imgs")
    img_path = img_dir / "a1.jpg"
    
    print("开始识别图片中的A和右侧数字...")
    
    # 识别图片
    if img_path.exists():
        print(f"\n识别图片: {img_path}")
        result_img, results = find_a_and_digit(str(img_path))
        
        if results:
            print("\n识别结果:")
            for result in results:
                print(f"  区域{result['region_id']}-字符{result['char_id']}: {result['char_type']}, "
                      f"位置: {result['position']}")
        else:
            print("未识别到A和数字")
        
        # 保存结果图片
        output_path = img_dir / "result_a_digit.jpg"
        cv2.imwrite(str(output_path), result_img)
        print(f"\n结果图片已保存到: {output_path}")

    print("\n识别完成！")

if __name__ == "__main__":
    main()