mi-task/test/text-image/text_image.py

284 lines
9.3 KiB
Python
Raw Normal View History

2025-08-21 12:51:13 +08:00
import cv2
import numpy as np
from pathlib import Path
2025-08-21 14:04:38 +08:00
def preprocess_image(image):
2025-08-21 12:51:13 +08:00
"""
2025-08-21 17:53:27 +08:00
预处理图像转换为HSV色彩空间提取V通道明度通道中的低亮度区域
2025-08-21 12:51:13 +08:00
"""
# 转换为HSV色彩空间
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
2025-08-21 17:53:27 +08:00
# 提取V通道明度通道
v_channel = hsv[:, :, 2]
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
# 对V通道进行二值化提取低亮度区域蓝色部分
# 使用较低的阈值来提取暗色区域
_, binary = cv2.threshold(v_channel, 120, 255, cv2.THRESH_BINARY_INV)
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
# 形态学操作:开运算去除小噪点,闭运算填充小孔
2025-08-21 17:53:27 +08:00
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
2025-08-21 14:04:38 +08:00
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
return cleaned, v_channel
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
def find_text_regions(binary_image):
2025-08-21 12:51:13 +08:00
"""
2025-08-21 17:53:27 +08:00
查找可能的文字区域
2025-08-21 12:51:13 +08:00
"""
2025-08-21 14:04:38 +08:00
contours, _ = cv2.findContours(
binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
# 筛选合适的文字区域轮廓
valid_text_regions = []
2025-08-21 12:51:13 +08:00
for contour in contours:
area = cv2.contourArea(contour)
2025-08-21 17:53:27 +08:00
if area > 100: # 过滤太小的区域
2025-08-21 14:04:38 +08:00
# 计算轮廓的边界矩形
2025-08-21 12:51:13 +08:00
x, y, w, h = cv2.boundingRect(contour)
2025-08-21 14:04:38 +08:00
2025-08-21 17:53:27 +08:00
# 计算宽高比
aspect_ratio = w / h
2025-08-21 14:04:38 +08:00
2025-08-21 17:53:27 +08:00
# 文字区域通常有合理的宽高比
if 0.1 < aspect_ratio < 10.0:
valid_text_regions.append((contour, (x, y, w, h)))
return valid_text_regions
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
def analyze_text_region(roi, original_roi):
2025-08-21 12:51:13 +08:00
"""
2025-08-21 17:53:27 +08:00
分析文字区域的特征
2025-08-21 12:51:13 +08:00
"""
2025-08-21 17:53:27 +08:00
# 计算黑色像素比例
2025-08-21 14:04:38 +08:00
total_pixels = roi.shape[0] * roi.shape[1]
2025-08-21 17:53:27 +08:00
black_pixels = np.sum(roi == 255)
black_ratio = black_pixels / total_pixels
2025-08-21 14:04:38 +08:00
# 计算区域的形状特征
height, width = roi.shape
aspect_ratio = width / height
2025-08-21 17:53:27 +08:00
# 计算边缘强度
2025-08-21 14:04:38 +08:00
edges = cv2.Canny(original_roi, 50, 150)
edge_density = np.sum(edges > 0) / total_pixels
return {
2025-08-21 17:53:27 +08:00
'black_ratio': black_ratio,
2025-08-21 14:04:38 +08:00
'aspect_ratio': aspect_ratio,
'edge_density': edge_density,
'area': total_pixels
}
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
def classify_digit_1_or_2(roi):
"""
使用OpenCV基本方法判断是数字1还是2
"""
# 调整ROI大小到统一尺寸进行分析
roi_resized = cv2.resize(roi, (20, 20))
# 计算像素密度特征
total_pixels = roi_resized.shape[0] * roi_resized.shape[1]
black_pixels = np.sum(roi_resized == 255)
density = black_pixels / total_pixels
# 计算宽高比
aspect_ratio = roi_resized.shape[1] / roi_resized.shape[0]
# 计算水平投影(每行的黑色像素数)
horizontal_projection = np.sum(roi_resized == 255, axis=1)
# 计算垂直投影(每列的黑色像素数)
vertical_projection = np.sum(roi_resized == 255, axis=0)
# 计算中心区域的像素分布
center_region = roi_resized[5:15, 5:15]
center_density = np.sum(center_region == 255) / (10 * 10)
# 计算垂直投影的方差数字1方差较小数字2方差较大
vertical_variance = np.var(vertical_projection)
# 计算水平投影的方差
horizontal_variance = np.var(horizontal_projection)
# 基于多个特征的综合判断
digit1_score = 0
digit2_score = 0
# 数字1的判断条件
if vertical_variance < 15: # 垂直投影方差小
digit1_score += 2
if center_density < 0.3: # 中心区域密度较低
digit1_score += 1
if density < 0.4: # 整体密度较低
digit1_score += 1
if aspect_ratio < 0.8: # 数字1通常较细
digit1_score += 1
# 数字2的判断条件
if vertical_variance > 15: # 垂直投影方差大
digit2_score += 2
if center_density > 0.3: # 中心区域密度较高
digit2_score += 1
if density > 0.4: # 整体密度较高
digit2_score += 1
if horizontal_variance > 8: # 水平投影方差大
digit2_score += 1
if aspect_ratio > 0.8: # 数字2通常较宽
digit2_score += 1
# 返回得分更高的数字
if digit1_score > digit2_score:
return 1
elif digit2_score > digit1_score:
return 2
else:
# 如果得分相同,使用更复杂的判断
if vertical_variance < 12:
return 1
else:
return 2
def detect_text_in_image(image_path, show_process=True):
2025-08-21 12:51:13 +08:00
"""
2025-08-21 17:53:27 +08:00
检测图片中的文字区域
2025-08-21 12:51:13 +08:00
"""
# 读取图像
image = cv2.imread(image_path)
if image is None:
print(f"无法读取图像: {image_path}")
2025-08-21 14:04:38 +08:00
return []
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
# 预处理
2025-08-21 17:53:27 +08:00
binary, v_channel = preprocess_image(image)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
# 查找文字区域
valid_text_regions = find_text_regions(binary)
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
results = []
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
for contour, (x, y, w, h) in valid_text_regions:
2025-08-21 14:04:38 +08:00
# 提取ROI
2025-08-21 17:53:27 +08:00
roi_binary = binary[y:y+h, x:x+w]
2025-08-21 14:04:38 +08:00
roi_original = image[y:y+h, x:x+w]
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
# 分析特征
2025-08-21 17:53:27 +08:00
features = analyze_text_region(roi_binary, roi_original)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
# 判断是否为有效的低亮度区域
is_valid_text_region = (
features['black_ratio'] > 0.05 and # 低亮度像素比例要求降低
features['edge_density'] > 0.003 # 边缘密度要求降低
2025-08-21 14:04:38 +08:00
)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
if is_valid_text_region:
# 使用OpenCV方法识别数字1或2
predicted_digit = classify_digit_1_or_2(roi_binary)
2025-08-21 14:04:38 +08:00
results.append({
2025-08-21 17:53:27 +08:00
'type': 'digit_region',
2025-08-21 14:04:38 +08:00
'position': (x, y, w, h),
2025-08-21 17:53:27 +08:00
'digit': predicted_digit,
2025-08-21 14:04:38 +08:00
'features': features,
2025-08-21 17:53:27 +08:00
'confidence': 'high' if features['black_ratio'] > 0.3 else 'medium'
2025-08-21 14:04:38 +08:00
})
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
# 在图像上绘制结果
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
2025-08-21 17:53:27 +08:00
cv2.putText(image, f"数字{predicted_digit}", (x, y - 10),
2025-08-21 14:04:38 +08:00
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
2025-08-21 17:53:27 +08:00
# 如果需要显示中间过程
if show_process:
# 创建过程图像
process_images = []
# 1. 原始图片
process_images.append(("原始图片", image.copy()))
# 2. HSV-V通道明度通道
v_display = cv2.applyColorMap(v_channel, cv2.COLORMAP_JET)
process_images.append(("HSV-V通道", v_display))
# 3. 二值化后的低亮度区域
process_images.append(("低亮度区域掩码", binary))
# 4. 轮廓检测结果
contour_image = image.copy()
for contour, (x, y, w, h) in valid_text_regions:
cv2.rectangle(contour_image, (x, y), (x + w, y + h), (0, 0, 255), 2)
process_images.append(("轮廓检测", contour_image))
# 5. 最终结果
process_images.append(("最终结果", image))
# 保存过程图像
img_dir = Path(image_path).parent
base_name = Path(image_path).stem
for i, (title, img) in enumerate(process_images):
if len(img.shape) == 2: # 如果是灰度图转换为BGR
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
2025-08-21 14:04:38 +08:00
2025-08-21 17:53:27 +08:00
output_path = img_dir / f"{base_name}_process_{i+1}_{title}.jpg"
cv2.imwrite(str(output_path), img)
print(f"过程图像已保存: {output_path}")
2025-08-21 14:04:38 +08:00
return image, results
2025-08-21 12:51:13 +08:00
def main():
"""
2025-08-21 17:53:27 +08:00
主函数测试文字区域检测
2025-08-21 12:51:13 +08:00
"""
# 图片路径
img_dir = Path("imgs")
2025-08-21 14:04:38 +08:00
img1_path = img_dir / "1.jpg"
2025-08-21 17:53:27 +08:00
print("开始检测图片中的文字区域...")
print("=" * 50)
2025-08-21 12:51:13 +08:00
2025-08-21 14:04:38 +08:00
# 检测第一张图片
if img1_path.exists():
print(f"\n检测图片: {img1_path}")
2025-08-21 17:53:27 +08:00
print("检测过程包括以下步骤:")
print("1. 原始图片")
print("2. HSV色彩空间转换")
print("3. 明度通道提取")
print("4. 二值化(低亮度区域)")
print("5. 轮廓检测")
print("6. 特征分析")
print("7. 数字识别")
print("8. 最终结果")
print("-" * 30)
2025-08-21 12:51:13 +08:00
2025-08-21 17:53:27 +08:00
result_img1, results1 = detect_text_in_image(str(img1_path), show_process=True)
print("\n检测结果:")
if results1:
for i, result in enumerate(results1):
print(f" 文字区域 {i+1}:")
print(f" 位置: {result['position']}")
print(f" 识别数字: {result['digit']}")
print(f" 置信度: {result['confidence']}")
print(f" 黑色比例: {result['features']['black_ratio']:.2f}")
print(f" 宽高比: {result['features']['aspect_ratio']:.2f}")
print(f" 边缘密度: {result['features']['edge_density']:.4f}")
print(f" 面积: {result['features']['area']}")
else:
print(" 未检测到文字区域")
2025-08-21 12:51:13 +08:00
# 保存结果图片
2025-08-21 14:04:38 +08:00
output_path1 = img_dir / "result_1.jpg"
cv2.imwrite(str(output_path1), result_img1)
2025-08-21 17:53:27 +08:00
print(f"\n最终结果图片已保存到: {output_path1}")
print("中间过程图像也已保存到imgs目录")
else:
print(f"图片文件不存在: {img1_path}")
2025-08-21 14:04:38 +08:00
print("\n检测完成!")
2025-08-21 12:51:13 +08:00
if __name__ == "__main__":
main()