import cv2 import numpy as np from pathlib import Path def preprocess_image(image): """ 预处理图像:转换为HSV色彩空间,提取V通道(明度通道)中的低亮度区域 """ # 转换为HSV色彩空间 hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # 提取V通道(明度通道) v_channel = hsv[:, :, 2] # 对V通道进行二值化,提取低亮度区域(蓝色部分) # 使用较低的阈值来提取暗色区域 _, binary = cv2.threshold(v_channel, 120, 255, cv2.THRESH_BINARY_INV) # 形态学操作:开运算去除小噪点,闭运算填充小孔 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel) return cleaned, v_channel def find_text_regions(binary_image): """ 查找可能的文字区域 """ contours, _ = cv2.findContours( binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) # 筛选合适的文字区域轮廓 valid_text_regions = [] for contour in contours: area = cv2.contourArea(contour) if area > 100: # 过滤太小的区域 # 计算轮廓的边界矩形 x, y, w, h = cv2.boundingRect(contour) # 计算宽高比 aspect_ratio = w / h # 文字区域通常有合理的宽高比 if 0.1 < aspect_ratio < 10.0: valid_text_regions.append((contour, (x, y, w, h))) return valid_text_regions def analyze_text_region(roi, original_roi): """ 分析文字区域的特征 """ # 计算黑色像素比例 total_pixels = roi.shape[0] * roi.shape[1] black_pixels = np.sum(roi == 255) black_ratio = black_pixels / total_pixels # 计算区域的形状特征 height, width = roi.shape aspect_ratio = width / height # 计算边缘强度 edges = cv2.Canny(original_roi, 50, 150) edge_density = np.sum(edges > 0) / total_pixels return { 'black_ratio': black_ratio, 'aspect_ratio': aspect_ratio, 'edge_density': edge_density, 'area': total_pixels } def classify_digit_1_or_2(roi): """ 使用OpenCV基本方法判断是数字1还是2 """ # 调整ROI大小到统一尺寸进行分析 roi_resized = cv2.resize(roi, (20, 20)) # 计算像素密度特征 total_pixels = roi_resized.shape[0] * roi_resized.shape[1] black_pixels = np.sum(roi_resized == 255) density = black_pixels / total_pixels # 计算宽高比 aspect_ratio = roi_resized.shape[1] / roi_resized.shape[0] # 计算水平投影(每行的黑色像素数) horizontal_projection = np.sum(roi_resized == 255, axis=1) # 计算垂直投影(每列的黑色像素数) vertical_projection = np.sum(roi_resized == 255, axis=0) # 计算中心区域的像素分布 center_region = roi_resized[5:15, 5:15] center_density = np.sum(center_region == 255) / (10 * 10) # 计算垂直投影的方差(数字1方差较小,数字2方差较大) vertical_variance = np.var(vertical_projection) # 计算水平投影的方差 horizontal_variance = np.var(horizontal_projection) # 基于多个特征的综合判断 digit1_score = 0 digit2_score = 0 # 数字1的判断条件 if vertical_variance < 15: # 垂直投影方差小 digit1_score += 2 if center_density < 0.3: # 中心区域密度较低 digit1_score += 1 if density < 0.4: # 整体密度较低 digit1_score += 1 if aspect_ratio < 0.8: # 数字1通常较细 digit1_score += 1 # 数字2的判断条件 if vertical_variance > 15: # 垂直投影方差大 digit2_score += 2 if center_density > 0.3: # 中心区域密度较高 digit2_score += 1 if density > 0.4: # 整体密度较高 digit2_score += 1 if horizontal_variance > 8: # 水平投影方差大 digit2_score += 1 if aspect_ratio > 0.8: # 数字2通常较宽 digit2_score += 1 # 返回得分更高的数字 if digit1_score > digit2_score: return 1 elif digit2_score > digit1_score: return 2 else: # 如果得分相同,使用更复杂的判断 if vertical_variance < 12: return 1 else: return 2 def detect_text_in_image(image_path, show_process=True): """ 检测图片中的文字区域 """ # 读取图像 image = cv2.imread(image_path) if image is None: print(f"无法读取图像: {image_path}") return [] # 预处理 binary, v_channel = preprocess_image(image) # 查找文字区域 valid_text_regions = find_text_regions(binary) results = [] for contour, (x, y, w, h) in valid_text_regions: # 提取ROI roi_binary = binary[y:y+h, x:x+w] roi_original = image[y:y+h, x:x+w] # 分析特征 features = analyze_text_region(roi_binary, roi_original) # 判断是否为有效的低亮度区域 is_valid_text_region = ( features['black_ratio'] > 0.05 and # 低亮度像素比例要求降低 features['edge_density'] > 0.003 # 边缘密度要求降低 ) if is_valid_text_region: # 使用OpenCV方法识别数字1或2 predicted_digit = classify_digit_1_or_2(roi_binary) results.append({ 'type': 'digit_region', 'position': (x, y, w, h), 'digit': predicted_digit, 'features': features, 'confidence': 'high' if features['black_ratio'] > 0.3 else 'medium' }) # 在图像上绘制结果 cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(image, f"数字{predicted_digit}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # 如果需要显示中间过程 if show_process: # 创建过程图像 process_images = [] # 1. 原始图片 process_images.append(("原始图片", image.copy())) # 2. HSV-V通道(明度通道) v_display = cv2.applyColorMap(v_channel, cv2.COLORMAP_JET) process_images.append(("HSV-V通道", v_display)) # 3. 二值化后的低亮度区域 process_images.append(("低亮度区域掩码", binary)) # 4. 轮廓检测结果 contour_image = image.copy() for contour, (x, y, w, h) in valid_text_regions: cv2.rectangle(contour_image, (x, y), (x + w, y + h), (0, 0, 255), 2) process_images.append(("轮廓检测", contour_image)) # 5. 最终结果 process_images.append(("最终结果", image)) # 保存过程图像 img_dir = Path(image_path).parent base_name = Path(image_path).stem for i, (title, img) in enumerate(process_images): if len(img.shape) == 2: # 如果是灰度图,转换为BGR img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) output_path = img_dir / f"{base_name}_process_{i+1}_{title}.jpg" cv2.imwrite(str(output_path), img) print(f"过程图像已保存: {output_path}") return image, results def main(): """ 主函数:测试文字区域检测 """ # 图片路径 img_dir = Path("imgs") img1_path = img_dir / "1.jpg" print("开始检测图片中的文字区域...") print("=" * 50) # 检测第一张图片 if img1_path.exists(): print(f"\n检测图片: {img1_path}") print("检测过程包括以下步骤:") print("1. 原始图片") print("2. HSV色彩空间转换") print("3. 明度通道提取") print("4. 二值化(低亮度区域)") print("5. 轮廓检测") print("6. 特征分析") print("7. 数字识别") print("8. 最终结果") print("-" * 30) result_img1, results1 = detect_text_in_image(str(img1_path), show_process=True) print("\n检测结果:") if results1: for i, result in enumerate(results1): print(f" 文字区域 {i+1}:") print(f" 位置: {result['position']}") print(f" 识别数字: {result['digit']}") print(f" 置信度: {result['confidence']}") print(f" 黑色比例: {result['features']['black_ratio']:.2f}") print(f" 宽高比: {result['features']['aspect_ratio']:.2f}") print(f" 边缘密度: {result['features']['edge_density']:.4f}") print(f" 面积: {result['features']['area']}") else: print(" 未检测到文字区域") # 保存结果图片 output_path1 = img_dir / "result_1.jpg" cv2.imwrite(str(output_path1), result_img1) print(f"\n最终结果图片已保存到: {output_path1}") print("中间过程图像也已保存到imgs目录") else: print(f"图片文件不存在: {img1_path}") print("\n检测完成!") if __name__ == "__main__": main()