mi-task/test/text-image/text_image.py

import cv2
import numpy as np
from pathlib import Path

def preprocess_image(image):
    """
    预处理图像：转换为HSV色彩空间，提取V通道（明度通道）中的低亮度区域
    """
    # 转换为HSV色彩空间
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # 提取V通道（明度通道）
    v_channel = hsv[:, :, 2]
    
    # 对V通道进行二值化，提取低亮度区域（蓝色部分）
    # 使用较低的阈值来提取暗色区域
    _, binary = cv2.threshold(v_channel, 120, 255, cv2.THRESH_BINARY_INV)
    
    # 形态学操作：开运算去除小噪点，闭运算填充小孔
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
    
    return cleaned, v_channel

def find_text_regions(binary_image):
    """
    查找可能的文字区域
    """
    contours, _ = cv2.findContours(
        binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    
    # 筛选合适的文字区域轮廓
    valid_text_regions = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 100:  # 过滤太小的区域
            # 计算轮廓的边界矩形
            x, y, w, h = cv2.boundingRect(contour)
            
            # 计算宽高比
            aspect_ratio = w / h
            
            # 文字区域通常有合理的宽高比
            if 0.1 < aspect_ratio < 10.0:
                valid_text_regions.append((contour, (x, y, w, h)))
    
    return valid_text_regions

def analyze_text_region(roi, original_roi):
    """
    分析文字区域的特征
    """
    # 计算黑色像素比例
    total_pixels = roi.shape[0] * roi.shape[1]
    black_pixels = np.sum(roi == 255)
    black_ratio = black_pixels / total_pixels
    
    # 计算区域的形状特征
    height, width = roi.shape
    aspect_ratio = width / height
    
    # 计算边缘强度
    edges = cv2.Canny(original_roi, 50, 150)
    edge_density = np.sum(edges > 0) / total_pixels
    
    return {
        'black_ratio': black_ratio,
        'aspect_ratio': aspect_ratio,
        'edge_density': edge_density,
        'area': total_pixels
    }

def classify_digit_1_or_2(roi):
    """
    使用OpenCV基本方法判断是数字1还是2
    """
    # 调整ROI大小到统一尺寸进行分析
    roi_resized = cv2.resize(roi, (20, 20))
    
    # 计算像素密度特征
    total_pixels = roi_resized.shape[0] * roi_resized.shape[1]
    black_pixels = np.sum(roi_resized == 255)
    density = black_pixels / total_pixels
    
    # 计算宽高比
    aspect_ratio = roi_resized.shape[1] / roi_resized.shape[0]
    
    # 计算水平投影（每行的黑色像素数）
    horizontal_projection = np.sum(roi_resized == 255, axis=1)
    
    # 计算垂直投影（每列的黑色像素数）
    vertical_projection = np.sum(roi_resized == 255, axis=0)
    
    # 计算中心区域的像素分布
    center_region = roi_resized[5:15, 5:15]
    center_density = np.sum(center_region == 255) / (10 * 10)
    
    # 计算垂直投影的方差（数字1方差较小，数字2方差较大）
    vertical_variance = np.var(vertical_projection)
    
    # 计算水平投影的方差
    horizontal_variance = np.var(horizontal_projection)
    
    # 基于多个特征的综合判断
    digit1_score = 0
    digit2_score = 0
    
    # 数字1的判断条件
    if vertical_variance < 15:  # 垂直投影方差小
        digit1_score += 2
    if center_density < 0.3:  # 中心区域密度较低
        digit1_score += 1
    if density < 0.4:  # 整体密度较低
        digit1_score += 1
    if aspect_ratio < 0.8:  # 数字1通常较细
        digit1_score += 1
    
    # 数字2的判断条件
    if vertical_variance > 15:  # 垂直投影方差大
        digit2_score += 2
    if center_density > 0.3:  # 中心区域密度较高
        digit2_score += 1
    if density > 0.4:  # 整体密度较高
        digit2_score += 1
    if horizontal_variance > 8:  # 水平投影方差大
        digit2_score += 1
    if aspect_ratio > 0.8:  # 数字2通常较宽
        digit2_score += 1
    
    # 返回得分更高的数字
    if digit1_score > digit2_score:
        return 1
    elif digit2_score > digit1_score:
        return 2
    else:
        # 如果得分相同，使用更复杂的判断
        if vertical_variance < 12:
            return 1
        else:
            return 2

def detect_text_in_image(image_path, show_process=True):
    """
    检测图片中的文字区域
    """
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"无法读取图像: {image_path}")
        return []
    
    # 预处理
    binary, v_channel = preprocess_image(image)
    
    # 查找文字区域
    valid_text_regions = find_text_regions(binary)
    
    results = []
    
    for contour, (x, y, w, h) in valid_text_regions:
        # 提取ROI
        roi_binary = binary[y:y+h, x:x+w]
        roi_original = image[y:y+h, x:x+w]
        
        # 分析特征
        features = analyze_text_region(roi_binary, roi_original)
        
        # 判断是否为有效的低亮度区域
        is_valid_text_region = (
            features['black_ratio'] > 0.05 and   # 低亮度像素比例要求降低
            features['edge_density'] > 0.003     # 边缘密度要求降低
        )
        
        if is_valid_text_region:
            # 使用OpenCV方法识别数字1或2
            predicted_digit = classify_digit_1_or_2(roi_binary)
            
            results.append({
                'type': 'digit_region',
                'position': (x, y, w, h),
                'digit': predicted_digit,
                'features': features,
                'confidence': 'high' if features['black_ratio'] > 0.3 else 'medium'
            })
            
            # 在图像上绘制结果
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(image, f"数字{predicted_digit}", (x, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    # 如果需要显示中间过程
    if show_process:
        # 创建过程图像
        process_images = []
        
        # 1. 原始图片
        process_images.append(("原始图片", image.copy()))
        
        # 2. HSV-V通道（明度通道）
        v_display = cv2.applyColorMap(v_channel, cv2.COLORMAP_JET)
        process_images.append(("HSV-V通道", v_display))
        
        # 3. 二值化后的低亮度区域
        process_images.append(("低亮度区域掩码", binary))
        
        # 4. 轮廓检测结果
        contour_image = image.copy()
        for contour, (x, y, w, h) in valid_text_regions:
            cv2.rectangle(contour_image, (x, y), (x + w, y + h), (0, 0, 255), 2)
        process_images.append(("轮廓检测", contour_image))
        
        # 5. 最终结果
        process_images.append(("最终结果", image))
        
        # 保存过程图像
        img_dir = Path(image_path).parent
        base_name = Path(image_path).stem
        
        for i, (title, img) in enumerate(process_images):
            if len(img.shape) == 2:  # 如果是灰度图，转换为BGR
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
            
            output_path = img_dir / f"{base_name}_process_{i+1}_{title}.jpg"
            cv2.imwrite(str(output_path), img)
            print(f"过程图像已保存: {output_path}")
    
    return image, results

def main():
    """
    主函数：测试文字区域检测
    """
    # 图片路径
    img_dir = Path("imgs")
    img1_path = img_dir / "1.jpg"
 
    print("开始检测图片中的文字区域...")
    print("=" * 50)
    
    # 检测第一张图片
    if img1_path.exists():
        print(f"\n检测图片: {img1_path}")
        print("检测过程包括以下步骤：")
        print("1. 原始图片")
        print("2. HSV色彩空间转换")
        print("3. 明度通道提取")
        print("4. 二值化（低亮度区域）")
        print("5. 轮廓检测")
        print("6. 特征分析")
        print("7. 数字识别")
        print("8. 最终结果")
        print("-" * 30)
        
        result_img1, results1 = detect_text_in_image(str(img1_path), show_process=True)
        
        print("\n检测结果:")
        if results1:
            for i, result in enumerate(results1):
                print(f"  文字区域 {i+1}:")
                print(f"    位置: {result['position']}")
                print(f"    识别数字: {result['digit']}")
                print(f"    置信度: {result['confidence']}")
                print(f"    黑色比例: {result['features']['black_ratio']:.2f}")
                print(f"    宽高比: {result['features']['aspect_ratio']:.2f}")
                print(f"    边缘密度: {result['features']['edge_density']:.4f}")
                print(f"    面积: {result['features']['area']}")
        else:
            print("  未检测到文字区域")
        
        # 保存结果图片
        output_path1 = img_dir / "result_1.jpg"
        cv2.imwrite(str(output_path1), result_img1)
        print(f"\n最终结果图片已保存到: {output_path1}")
        print("中间过程图像也已保存到imgs目录")
    else:
        print(f"图片文件不存在: {img1_path}")
    
    print("\n检测完成！")

if __name__ == "__main__":
    main()