import cv2 import numpy as np import os from pathlib import Path def detect_white_paper(image): """ 检测图像中的白色纸张区域 """ # 转换为HSV色彩空间 hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # 更严格的白色检测阈值 lower_white = np.array([0, 0, 230]) # 高亮度,低饱和度 upper_white = np.array([180, 20, 255]) # 创建白色掩码 white_mask = cv2.inRange(hsv, lower_white, upper_white) # 形态学操作:去除噪点 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) white_mask = cv2.morphologyEx(white_mask, cv2.MORPH_OPEN, kernel) # 查找轮廓 contours, _ = cv2.findContours(white_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 筛选合适的白色区域(基于面积和形状) paper_regions = [] for contour in contours: area = cv2.contourArea(contour) if area > 5000: # 过滤太小的区域 x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / h # 纸张通常是横向的 if 1.0 < aspect_ratio < 3.0: paper_regions.append((x, y, w, h)) return paper_regions, white_mask def find_text_regions(roi): """ 在ROI中查找文字区域 """ # 转换为灰度图 gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # 自适应二值化,更好地处理不同光照条件 binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) # 形态学操作:连接文字笔画 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # 查找轮廓 contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 筛选文字轮廓 text_regions = [] for contour in contours: area = cv2.contourArea(contour) if 200 < area < 10000: # 合理的文字面积范围 x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / h # 文字通常有合理的宽高比 if 0.3 < aspect_ratio < 4.0: text_regions.append((x, y, w, h)) return text_regions, binary def classify_digit(roi, region): """ 分类数字1或2 """ x, y, w, h = region # 提取字符ROI char_roi = roi[y:y+h, x:x+w] # 转换为灰度并二值化 gray_char = cv2.cvtColor(char_roi, cv2.COLOR_BGR2GRAY) _, binary_char = cv2.threshold(gray_char, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # 调整大小到统一尺寸 char_resized = cv2.resize(binary_char, (20, 20)) # 计算特征 total_pixels = char_resized.shape[0] * char_resized.shape[1] black_pixels = np.sum(char_resized == 255) density = black_pixels / total_pixels # 计算水平和垂直投影 horizontal_proj = np.sum(char_resized == 255, axis=1) vertical_proj = np.sum(char_resized == 255, axis=0) # 计算投影的方差 h_variance = np.var(horizontal_proj) v_variance = np.var(vertical_proj) # 计算中心区域密度 center_region = char_resized[6:14, 6:14] center_density = np.sum(center_region == 255) / (8 * 8) # 基于特征分类 if density < 0.2: return "unknown" # 数字1的特征:垂直投影方差小,中心密度低 if v_variance < 8 and center_density < 0.4 and density < 0.5: return "1" # 数字2的特征:垂直投影方差大,中心密度高 if v_variance > 12 and center_density > 0.3 and density > 0.4: return "2" return "unknown" def find_a_and_digit(image_path): """ 查找A和右侧的数字 """ # 读取图像 image = cv2.imread(image_path) if image is None: print(f"无法读取图像: {image_path}") return None, [] # 检测白色纸张区域 paper_regions, white_mask = detect_white_paper(image) if not paper_regions: print("未检测到白色纸张区域") return image, [] print(f"检测到 {len(paper_regions)} 个白色纸张区域") all_results = [] # 处理每个纸张区域 for i, paper_region in enumerate(paper_regions): x, y, w, h = paper_region print(f"处理纸张区域 {i+1}: 位置({x}, {y}), 大小({w}x{h})") # 提取纸张ROI paper_roi = image[y:y+h, x:x+w] # 查找文字区域 text_regions, binary = find_text_regions(paper_roi) print(f" 在区域 {i+1} 中找到 {len(text_regions)} 个文字区域") # 按x坐标排序文字区域(从左到右) text_regions.sort(key=lambda r: r[0]) # 查找A和右侧的数字 a_found = False digit_found = False for j, text_region in enumerate(text_regions): tx, ty, tw, th = text_region # 简单判断:如果区域较宽,可能是A;如果较窄,可能是数字 if tw > th * 1.5 and not a_found: # 可能是A a_found = True result = { 'region_id': i + 1, 'char_id': j + 1, 'char_type': 'A', 'position': (x + tx, y + ty, tw, th), 'relative_position': (tx, ty, tw, th) } all_results.append(result) print(f" 找到A: 位置({tx}, {ty}), 大小({tw}x{th})") elif tw <= th * 1.5 and a_found and not digit_found: # 可能是数字 # 分类数字 digit_type = classify_digit(paper_roi, text_region) if digit_type in ['1', '2']: digit_found = True result = { 'region_id': i + 1, 'char_id': j + 1, 'char_type': digit_type, 'position': (x + tx, y + ty, tw, th), 'relative_position': (tx, ty, tw, th) } all_results.append(result) print(f" 找到数字{digit_type}: 位置({tx}, {ty}), 大小({tw}x{th})") break # 在纸张周围绘制边框 cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 3) cv2.putText(image, f"Paper {i+1}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2) # 在识别结果周围绘制边框 for result in all_results: rx, ry, rw, rh = result['position'] color = (0, 255, 0) if result['char_type'] != "unknown" else (0, 0, 255) cv2.rectangle(image, (rx, ry), (rx + rw, ry + rh), color, 2) cv2.putText(image, result['char_type'], (rx, ry - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) return image, all_results def main(): """ 主函数:测试A和数字识别 """ # 图片路径 img_dir = Path("imgs") img_path = img_dir / "a1.jpg" print("开始识别图片中的A和右侧数字...") # 识别图片 if img_path.exists(): print(f"\n识别图片: {img_path}") result_img, results = find_a_and_digit(str(img_path)) if results: print("\n识别结果:") for result in results: print(f" 区域{result['region_id']}-字符{result['char_id']}: {result['char_type']}, " f"位置: {result['position']}") else: print("未识别到A和数字") # 保存结果图片 output_path = img_dir / "result_a_digit.jpg" cv2.imwrite(str(output_path), result_img) print(f"\n结果图片已保存到: {output_path}") print("\n识别完成!") if __name__ == "__main__": main()