.NET6实现基于PaddleOCR+OpenCV的OCRAPI

最近查阅资料，研究如何通过.net 6封装一个OCR API供自己的项目调用,期间了解到Google的Tesseract,尝试过后感觉Tesseract在中文识别的结果上达不到想要的预期,最后发现百度的开源项目PaddleOCR能满足需求,因此做如下尝试

新建解决方案OCRDemo,并创建Web Api项目OCR.API,类库项目OCR.Lib以及OCR.Application,项目结构如下:

PaddleOCR和OpenCV在nuget都有封装好的包,直接安装如上图所示的nuget包,其中个别包的版本名称包含centos7-x64和win是为了分别应对不同平台的部署

安装好之后,在项目OCR.Application中创建接口IOCRService.cs

public interface IOCRService
{
    /// <summary>
    /// 识别
    /// </summary>
    /// <param name="picStream"></param>
    /// <param name="resizeLine"></param>
    /// <param name="cancellationToken"></param>
    /// <returns></returns>
    Task<OCRResult> RunAsync(Stream picStream, int resizeLine, CancellationToken cancellationToken);
}

以及IRotationDetectorService.cs

public interface IRotationService
{
    /// <summary>
    /// 旋转检测
    /// </summary>
    /// <param name="picStream"></param>
    /// <returns></returns>
    MemoryStream RotationCheck(Stream picStream);
}

创建OCRResult.cs,定义图形识别之后的返回结果

public class OCRResult
{
    public List<OCRRegion> Regions { get; set; }

    public string Text { get; set; }
}

public class OCRSize
{
    public double Height { get; set; }
    public double Width { get; set; }
}

public class  OCRPosition
{
    public double X { get; set; }
    public double Y { get; set; }
}

public class OCRRect
{
    public double Angle { get; set; }

    public OCRSize Size { get; set; }

    public OCRPosition Center { get; set; }
}

public class OCRRegion
{
    public double Score { get; set; }

    public OCRRect Rect { get; set; }

    public string Text { get; set; }
}

OCR.Lib项目中创建类Extensions.OpenCV.cs,实现OpenCV的部分图形预处理扩展方法

public static class Extensions
{
    /// <summary>
    /// 比例缩放大小
    /// </summary>
    /// <param name="src"></param>
    /// <param name="lineSize">宽+高的最大限定值</param>
    /// <returns></returns>
    public static Mat SrcResize(this Mat src, int lineSize)
    {
        //比例缩放
        int size = src.Width + src.Height;
        double wRate = src.Width / (double)size;
        double hRate = src.Height / (double)size;

        var reSize = new Size(wRate * lineSize, hRate * lineSize);

        using Mat resize_mat = new Mat();
        Cv2.Resize(src, resize_mat, reSize);
        src = resize_mat.Clone();
        return src;
    }

    //边缘检测

    //膨胀

    //腐蚀

    //二值化

    // ...
}

接着创建RotationService.cs实现IRotationService旋转检测并矫正旋转角度

public class RotationService : IRotationService
{
    public MemoryStream RotationCheck(Stream picStream)
    {
        using Mat src = Mat.FromStream(picStream, ImreadModes.Color);
        //旋转检测
        using PaddleRotationDetector detector = new PaddleRotationDetector(RotationDetectionModel.EmbeddedDefault);
        RotationResult rrotationResult = detector.Run(src);
        var rotation = rrotationResult.Rotation;

        if (rotation != RotationDegree._0)
        {
            //矫正旋转
            using Mat retotionSrc = rrotationResult.RestoreRotationInPlace(src).Clone();
            return retotionSrc.ToMemoryStream();
        }
        return src.ToMemoryStream();
    }
}

创建OCRService.cs实现IOCRService完成图形字符识别

public class OCRService : IOCRService
{
    private readonly QueuedPaddleOcrAll _paddleOcr;
    public OCRService(QueuedPaddleOcrAll paddleOcr)
    {
        _paddleOcr = paddleOcr;
    }

    /// <summary>
    /// 识别
    /// </summary>
    /// <param name="picStream"></param>
    /// <param name="resizeLine"></param>
    /// <param name="cancellationToken"></param>
    /// <returns></returns>
    public async Task<OCRResult> RunAsync(Stream picStream, int resizeLine, CancellationToken cancellationToken)
    {
        //获取原图
        using Mat src = Mat.FromStream(picStream, ImreadModes.Color);
        //比例缩放大小,以便识别
        using Mat resize_mat = src.SrcResize(resizeLine);
  
        var paddleResult = await _paddleOcr.Run(resize_mat, cancellationToken: cancellationToken);

        List<OCRRegion> regions = new ();
        foreach (var item in paddleResult.Regions)
        {
            regions.Add(new OCRRegion
            {
                Rect = new OCRRect
                {
                    Angle = item.Rect.Angle,
                    Center = new OCRPosition
                    {
                        X = item.Rect.Center.X,
                        Y = item.Rect.Center.Y
                    },
                    Size = new OCRSize
                    {
                        Height = item.Rect.Size.Height,
                        Width = item.Rect.Size.Width
                    }
                },
                Score = item.Score,
                Text = item.Text,
            });
        }

        return new OCRResult { 
             Text = paddleResult.Text,
             Regions = regions
        };
    }
}

创建Startup.cs通过DI注入服务

public static class Startup
{
    public static IServiceCollection AddOcrLib(this IServiceCollection services, IConfiguration config)
    {
        return services
            .AddTransient<IOCRService, OCRService>()
            .AddTransient<IRotationService, RotationService>()
            .AddPaddleOCR(config);
    }


    public static IServiceCollection AddPaddleOCR(this IServiceCollection services, IConfiguration config)
    {
        return services.AddSingleton(s =>
        {
            //GPU or CPU
            Action<PaddleConfig> device = config.GetSection("PaddleDevice").Value == "GPU" ? PaddleDevice.Gpu() : PaddleDevice.Mkldnn();
            return new QueuedPaddleOcrAll(() => new PaddleOcrAll(LocalFullModels.ChineseV3, device)
            {
                Enable180Classification = true,
                AllowRotateDetection = true,
            }, consumerCount: 1);
        });
    }
}

OCR.API项目中修改Program.cs,添加以下行:

1	`builder.Services.AddOcrLib(builder.Configuration);`

修改后如下:

 
try
{
	var builder = WebApplication.CreateBuilder(args);

	// Add services to the container.

	builder.Services.AddControllers();

	builder.Services.AddOcrLib(builder.Configuration);

	var app = builder.Build();

	// Configure the HTTP request pipeline.

	app.UseStaticFiles();

	app.UseRouting();

	app.UseAuthorization();

	app.MapControllerRoute(
		name: "default",
		pattern: "Api/{controller=Home}/{action=Index}/{id?}");


	app.Run();

}
catch (Exception ex)
{
	throw ex;
}

appsettings.json文件中添加配置:

1	`"PaddleDevice": "CPU" // or "GPU"`

向Controllers文件夹下添加控制器OcrController.cs文件

[Route("Api/[controller]")]
[ApiController]
public class BaseApiController: ControllerBase
{
}

public class OcrController : BaseApiController
{
    private readonly IOCRService _ocrService;
    private readonly IRotationService _rotationSerivice;
    public OcrController(IOCRService ocrService,IRotationService rotaionService) 
    {
        _ocrService = ocrService;
        _rotationSerivice = rotaionService;
    }

    [HttpPost]
    [Route("run")]
    public async Task<OCRResult> RunAsync(IFormFile file,CancellationToken cancellationToken)
    {
        using Stream stream = file.OpenReadStream();

        try
        {
            //检测旋转
            var ms = _rotationSerivice.RotationCheck(stream);

            var result = await _ocrService.RunAsync(ms, 1500, cancellationToken);

            return result;
        }
        catch (Exception ex)
        {
            throw;
        }
    }
}

通过Postman上传一张身份证请求API查看返回结果如下:

Dockerfile支持:

#See https://aka.ms/containerfastmode to understand how Visual Studio uses this Dockerfile to build your images for faster debugging.

FROM sdflysha/dotnet6-focal-paddle2.2.2:latest AS base
WORKDIR /app
EXPOSE 80

FROM sdflysha/dotnet6-focal-paddle2.2.2:latest AS build
WORKDIR /src
COPY ["OCR.API/OCR.API.csproj", "OCR.API/"]
COPY ["OCR.Lib/OCR.Lib.csproj", "OCR.Lib/"]
COPY ["OCR.Application/OCR.Application.csproj", "OCR.Application/"]
RUN dotnet restore "OCR.API/OCR.API.csproj"
COPY . .
WORKDIR "/src/OCR.API"
RUN dotnet build "OCR.API.csproj" -c Release -o /app/build

FROM build AS publish
RUN dotnet publish "OCR.API.csproj" -c Release -o /app/publish /p:UseAppHost=false

FROM base AS final
WORKDIR /app
COPY --from=publish /app/publish .
ENTRYPOINT ["dotnet", "OCR.API.dll"]