Back to Repositories

Testing BOM Data Handling and Character Encoding in EasyExcel

This test suite validates BOM (Byte Order Mark) handling in CSV file operations using EasyExcel library. It ensures proper character encoding and BOM detection across different character sets while reading and writing CSV files.

Test Coverage Overview

The test suite provides comprehensive coverage of BOM-related functionality in CSV processing:
  • Reading CSV files with and without BOM markers
  • Writing CSV files with different character encodings (UTF-8, GBK, UTF-16BE)
  • Verification of content integrity across various encoding scenarios
  • Handling of both default and explicit charset configurations

Implementation Analysis

The testing approach implements a systematic verification of BOM handling:
  • Utilizes JUnit 5 framework with ordered test execution
  • Implements custom ReadListener for data validation
  • Employs both separate read/write tests and combined read-write verification
  • Validates header content and data integrity across different character sets

Technical Details

Testing infrastructure and configuration:
  • JUnit Jupiter test framework
  • EasyExcel API for Excel/CSV operations
  • Apache Commons Compress utilities
  • Lombok for logging support
  • Custom test file utility methods
  • Configurable charset and BOM parameters

Best Practices Demonstrated

The test implementation showcases several testing best practices:
  • Systematic test method ordering using @TestMethodOrder
  • Proper test isolation and resource management
  • Comprehensive assertion coverage for data validation
  • Modular test methods with clear separation of concerns
  • Robust error handling and verification

alibaba/easyexcel

easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java

            
package com.alibaba.easyexcel.test.core.bom;

import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;

import com.alibaba.easyexcel.test.util.TestFileUtil;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.metadata.data.ReadCellData;
import com.alibaba.excel.read.listener.ReadListener;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.util.ListUtils;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.utils.Lists;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;

/**
 * bom test
 *
 * @author Jiaju Zhuang
 */
@TestMethodOrder(MethodOrderer.MethodName.class)
@Slf4j
public class BomDataTest {

    @Test
    public void t01ReadCsv() {
        readCsv(TestFileUtil.readFile("bom" + File.separator + "no_bom.csv"));
        readCsv(TestFileUtil.readFile("bom" + File.separator + "office_bom.csv"));
    }

    @Test
    public void t02ReadAndWriteCsv() throws Exception {
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_default.csv"), null, null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8.csv"), "UTF-8", null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_lower_case.csv"), "utf-8", null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk.csv"), "GBK", null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk_lower_case.csv"), "gbk", null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_16be.csv"), "UTF-16BE", null);
        readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_not_with_bom.csv"), "UTF-8",
            Boolean.FALSE);
    }

    private void readAndWriteCsv(File file, String charsetName, Boolean withBom) throws Exception {
        Charset charset = null;
        if (charsetName != null) {
            charset = Charset.forName(charsetName);
        }
        EasyExcel.write(new FileOutputStream(file), BomData.class)
            .charset(charset)
            .withBom(withBom)
            .excelType(ExcelTypeEnum.CSV)
            .sheet()
            .doWrite(data());

        EasyExcel.read(file, BomData.class, new ReadListener<BomData>() {

                private final List<BomData> dataList = Lists.newArrayList();

                @Override
                public void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) {
                    String head = headMap.get(0).getStringValue();
                    Assertions.assertEquals("姓名", head);
                }

                @Override
                public void invoke(BomData data, AnalysisContext context) {
                    dataList.add(data);
                }

                @Override
                public void doAfterAllAnalysed(AnalysisContext context) {
                    Assertions.assertEquals(dataList.size(), 10);
                    BomData bomData = dataList.get(0);
                    Assertions.assertEquals("姓名0", bomData.getName());
                    Assertions.assertEquals(20, (long)bomData.getAge());
                }
            })
            .charset(charset)
            .sheet().doRead();
    }

    private void readCsv(File file) {
        EasyExcel.read(file, BomData.class, new ReadListener<BomData>() {

            private final List<BomData> dataList = Lists.newArrayList();

            @Override
            public void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) {
                String head = headMap.get(0).getStringValue();
                Assertions.assertEquals("姓名", head);
            }

            @Override
            public void invoke(BomData data, AnalysisContext context) {
                dataList.add(data);
            }

            @Override
            public void doAfterAllAnalysed(AnalysisContext context) {
                Assertions.assertEquals(dataList.size(), 10);
                BomData bomData = dataList.get(0);
                Assertions.assertEquals("姓名0", bomData.getName());
                Assertions.assertEquals(20L, (long)bomData.getAge());
            }
        }).sheet().doRead();
    }

    private List<BomData> data() {
        List<BomData> list = ListUtils.newArrayList();
        for (int i = 0; i < 10; i++) {
            BomData data = new BomData();
            data.setName("姓名" + i);
            data.setAge(20L);
            list.add(data);
        }
        return list;
    }
}