Validating Formula Recognition Components in PaddleOCR
This test suite validates the UniMERNet model implementation in PaddleOCR, focusing on both the backbone and head components. It ensures proper processing of formula images and text generation using the Donut-Swin architecture.
Test Coverage Overview
Implementation Analysis
Technical Details
Best Practices Demonstrated
paddlepaddle/paddleocr
tests/test_formula_model.py
import sys
import os
from pathlib import Path
from typing import Any
import paddle
import pytest
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(current_dir, "..")))
from ppocr.modeling.backbones.rec_donut_swin import DonutSwinModel, DonutSwinModelOutput
from ppocr.modeling.heads.rec_unimernet_head import UniMERNetHead
@pytest.fixture
def sample_image():
return paddle.randn([1, 1, 192, 672])
@pytest.fixture
def encoder_feat():
encoded_feat = paddle.randn([1, 126, 1024])
return DonutSwinModelOutput(
last_hidden_state=encoded_feat,
)
def test_unimernet_backbone(sample_image):
"""
Test UniMERNet backbone.
Args:
sample_image: sample image to be processed.
"""
backbone = DonutSwinModel(
hidden_size=1024,
num_layers=4,
num_heads=[4, 8, 16, 32],
add_pooling_layer=True,
use_mask_token=False,
)
backbone.eval()
with paddle.no_grad():
result = backbone(sample_image)
encoder_feat = result[0]
assert encoder_feat.shape == [1, 126, 1024]
def test_unimernet_head(encoder_feat):
"""
Test UniMERNet head.
Args:
encoder_feat: encoder feature from unimernet backbone.
"""
head = UniMERNetHead(
max_new_tokens=5,
decoder_start_token_id=0,
temperature=0.2,
do_sample=False,
top_p=0.95,
encoder_hidden_size=1024,
is_export=False,
length_aware=True,
)
head.eval()
with paddle.no_grad():
result = head(encoder_feat)
assert result.shape == [1, 6]