mi-task/test/text-image/text_image.py
2025-08-21 17:53:27 +08:00

284 lines
9.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
from pathlib import Path
def preprocess_image(image):
"""
预处理图像转换为HSV色彩空间提取V通道明度通道中的低亮度区域
"""
# 转换为HSV色彩空间
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 提取V通道明度通道
v_channel = hsv[:, :, 2]
# 对V通道进行二值化提取低亮度区域蓝色部分
# 使用较低的阈值来提取暗色区域
_, binary = cv2.threshold(v_channel, 120, 255, cv2.THRESH_BINARY_INV)
# 形态学操作:开运算去除小噪点,闭运算填充小孔
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
return cleaned, v_channel
def find_text_regions(binary_image):
"""
查找可能的文字区域
"""
contours, _ = cv2.findContours(
binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
# 筛选合适的文字区域轮廓
valid_text_regions = []
for contour in contours:
area = cv2.contourArea(contour)
if area > 100: # 过滤太小的区域
# 计算轮廓的边界矩形
x, y, w, h = cv2.boundingRect(contour)
# 计算宽高比
aspect_ratio = w / h
# 文字区域通常有合理的宽高比
if 0.1 < aspect_ratio < 10.0:
valid_text_regions.append((contour, (x, y, w, h)))
return valid_text_regions
def analyze_text_region(roi, original_roi):
"""
分析文字区域的特征
"""
# 计算黑色像素比例
total_pixels = roi.shape[0] * roi.shape[1]
black_pixels = np.sum(roi == 255)
black_ratio = black_pixels / total_pixels
# 计算区域的形状特征
height, width = roi.shape
aspect_ratio = width / height
# 计算边缘强度
edges = cv2.Canny(original_roi, 50, 150)
edge_density = np.sum(edges > 0) / total_pixels
return {
'black_ratio': black_ratio,
'aspect_ratio': aspect_ratio,
'edge_density': edge_density,
'area': total_pixels
}
def classify_digit_1_or_2(roi):
"""
使用OpenCV基本方法判断是数字1还是2
"""
# 调整ROI大小到统一尺寸进行分析
roi_resized = cv2.resize(roi, (20, 20))
# 计算像素密度特征
total_pixels = roi_resized.shape[0] * roi_resized.shape[1]
black_pixels = np.sum(roi_resized == 255)
density = black_pixels / total_pixels
# 计算宽高比
aspect_ratio = roi_resized.shape[1] / roi_resized.shape[0]
# 计算水平投影(每行的黑色像素数)
horizontal_projection = np.sum(roi_resized == 255, axis=1)
# 计算垂直投影(每列的黑色像素数)
vertical_projection = np.sum(roi_resized == 255, axis=0)
# 计算中心区域的像素分布
center_region = roi_resized[5:15, 5:15]
center_density = np.sum(center_region == 255) / (10 * 10)
# 计算垂直投影的方差数字1方差较小数字2方差较大
vertical_variance = np.var(vertical_projection)
# 计算水平投影的方差
horizontal_variance = np.var(horizontal_projection)
# 基于多个特征的综合判断
digit1_score = 0
digit2_score = 0
# 数字1的判断条件
if vertical_variance < 15: # 垂直投影方差小
digit1_score += 2
if center_density < 0.3: # 中心区域密度较低
digit1_score += 1
if density < 0.4: # 整体密度较低
digit1_score += 1
if aspect_ratio < 0.8: # 数字1通常较细
digit1_score += 1
# 数字2的判断条件
if vertical_variance > 15: # 垂直投影方差大
digit2_score += 2
if center_density > 0.3: # 中心区域密度较高
digit2_score += 1
if density > 0.4: # 整体密度较高
digit2_score += 1
if horizontal_variance > 8: # 水平投影方差大
digit2_score += 1
if aspect_ratio > 0.8: # 数字2通常较宽
digit2_score += 1
# 返回得分更高的数字
if digit1_score > digit2_score:
return 1
elif digit2_score > digit1_score:
return 2
else:
# 如果得分相同,使用更复杂的判断
if vertical_variance < 12:
return 1
else:
return 2
def detect_text_in_image(image_path, show_process=True):
"""
检测图片中的文字区域
"""
# 读取图像
image = cv2.imread(image_path)
if image is None:
print(f"无法读取图像: {image_path}")
return []
# 预处理
binary, v_channel = preprocess_image(image)
# 查找文字区域
valid_text_regions = find_text_regions(binary)
results = []
for contour, (x, y, w, h) in valid_text_regions:
# 提取ROI
roi_binary = binary[y:y+h, x:x+w]
roi_original = image[y:y+h, x:x+w]
# 分析特征
features = analyze_text_region(roi_binary, roi_original)
# 判断是否为有效的低亮度区域
is_valid_text_region = (
features['black_ratio'] > 0.05 and # 低亮度像素比例要求降低
features['edge_density'] > 0.003 # 边缘密度要求降低
)
if is_valid_text_region:
# 使用OpenCV方法识别数字1或2
predicted_digit = classify_digit_1_or_2(roi_binary)
results.append({
'type': 'digit_region',
'position': (x, y, w, h),
'digit': predicted_digit,
'features': features,
'confidence': 'high' if features['black_ratio'] > 0.3 else 'medium'
})
# 在图像上绘制结果
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(image, f"数字{predicted_digit}", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# 如果需要显示中间过程
if show_process:
# 创建过程图像
process_images = []
# 1. 原始图片
process_images.append(("原始图片", image.copy()))
# 2. HSV-V通道明度通道
v_display = cv2.applyColorMap(v_channel, cv2.COLORMAP_JET)
process_images.append(("HSV-V通道", v_display))
# 3. 二值化后的低亮度区域
process_images.append(("低亮度区域掩码", binary))
# 4. 轮廓检测结果
contour_image = image.copy()
for contour, (x, y, w, h) in valid_text_regions:
cv2.rectangle(contour_image, (x, y), (x + w, y + h), (0, 0, 255), 2)
process_images.append(("轮廓检测", contour_image))
# 5. 最终结果
process_images.append(("最终结果", image))
# 保存过程图像
img_dir = Path(image_path).parent
base_name = Path(image_path).stem
for i, (title, img) in enumerate(process_images):
if len(img.shape) == 2: # 如果是灰度图转换为BGR
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
output_path = img_dir / f"{base_name}_process_{i+1}_{title}.jpg"
cv2.imwrite(str(output_path), img)
print(f"过程图像已保存: {output_path}")
return image, results
def main():
"""
主函数:测试文字区域检测
"""
# 图片路径
img_dir = Path("imgs")
img1_path = img_dir / "1.jpg"
print("开始检测图片中的文字区域...")
print("=" * 50)
# 检测第一张图片
if img1_path.exists():
print(f"\n检测图片: {img1_path}")
print("检测过程包括以下步骤:")
print("1. 原始图片")
print("2. HSV色彩空间转换")
print("3. 明度通道提取")
print("4. 二值化(低亮度区域)")
print("5. 轮廓检测")
print("6. 特征分析")
print("7. 数字识别")
print("8. 最终结果")
print("-" * 30)
result_img1, results1 = detect_text_in_image(str(img1_path), show_process=True)
print("\n检测结果:")
if results1:
for i, result in enumerate(results1):
print(f" 文字区域 {i+1}:")
print(f" 位置: {result['position']}")
print(f" 识别数字: {result['digit']}")
print(f" 置信度: {result['confidence']}")
print(f" 黑色比例: {result['features']['black_ratio']:.2f}")
print(f" 宽高比: {result['features']['aspect_ratio']:.2f}")
print(f" 边缘密度: {result['features']['edge_density']:.4f}")
print(f" 面积: {result['features']['area']}")
else:
print(" 未检测到文字区域")
# 保存结果图片
output_path1 = img_dir / "result_1.jpg"
cv2.imwrite(str(output_path1), result_img1)
print(f"\n最终结果图片已保存到: {output_path1}")
print("中间过程图像也已保存到imgs目录")
else:
print(f"图片文件不存在: {img1_path}")
print("\n检测完成!")
if __name__ == "__main__":
main()