Back to Repositories

Validating Sophia Language Tokenization in Monaco Editor

This test suite validates the tokenization functionality for the Sophia programming language in Monaco Editor, focusing on syntax highlighting and token classification for language constructs like contracts, records, and comments.

Test Coverage Overview

The test suite provides comprehensive coverage of Sophia language tokenization, including:

Contract declarations and structure
Record type definitions and field declarations
Entry point function syntax
Single-line and multi-line comment parsing
Basic operators and delimiters

Implementation Analysis

The testing approach uses a structured tokenization testing framework with precise token classification and boundary verification. Each test case defines expected token types, positions, and content validation for different language elements.

The implementation leverages Monaco’s token testing utilities to verify correct lexical analysis of Sophia code snippets.

Technical Details

Testing infrastructure includes:

testTokenization utility from Monaco test runner
Token classification system for Sophia language elements
Detailed token metadata including startIndex and type properties
Support for nested token structures and context-aware parsing

Best Practices Demonstrated

The test suite exhibits strong testing practices through:

Systematic coverage of language constructs
Explicit boundary testing for token positions
Comprehensive comment handling verification
Clear test case organization and documentation
Edge case validation for partial and invalid syntax

microsoft/monaco-editor

src/basic-languages/sophia/sophia.test.ts

            
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

import { testTokenization } from '../test/testRunner';

testTokenization('aes', [
	[
		{
			line: 'contract HackBG =',
			tokens: [
				{ startIndex: 0, type: 'keyword.contract.aes' },
				{ startIndex: 8, type: '' },
				{ startIndex: 9, type: 'identifier.aes' },
				{ startIndex: 15, type: '' },
				{ startIndex: 16, type: 'delimiter.aes' }
			]
		}
	],

	[
		{
			line: 'record state = { developers : list(developer) }',
			tokens: [
				{ startIndex: 0, type: 'keyword.record.aes' },
				{ startIndex: 6, type: '' },
				{ startIndex: 7, type: 'keyword.state.aes' },
				{ startIndex: 12, type: '' },
				{ startIndex: 13, type: 'delimiter.aes' },
				{ startIndex: 14, type: '' },
				{ startIndex: 15, type: 'delimiter.curly.aes' },
				{ startIndex: 16, type: '' },
				{ startIndex: 17, type: 'identifier.aes' },
				{ startIndex: 27, type: '' },
				{ startIndex: 28, type: 'delimiter.aes' },
				{ startIndex: 29, type: '' },
				{ startIndex: 30, type: 'keyword.list.aes' },
				{ startIndex: 34, type: 'delimiter.parenthesis.aes' },
				{ startIndex: 35, type: 'identifier.aes' },
				{ startIndex: 44, type: 'delimiter.parenthesis.aes' },
				{ startIndex: 45, type: '' },
				{ startIndex: 46, type: 'delimiter.curly.aes' }
			]
		}
	],

	[
		{
			line: 'record developer = {',
			tokens: [
				{ startIndex: 0, type: 'keyword.record.aes' },
				{ startIndex: 6, type: '' },
				{ startIndex: 7, type: 'identifier.aes' },
				{ startIndex: 16, type: '' },
				{ startIndex: 17, type: 'delimiter.aes' },
				{ startIndex: 18, type: '' },
				{ startIndex: 19, type: 'delimiter.curly.aes' }
			]
		}
	],

	[
		{
			line: 'name : string,',
			tokens: [
				{ startIndex: 0, type: 'identifier.aes' },
				{ startIndex: 4, type: '' },
				{ startIndex: 5, type: 'delimiter.aes' },
				{ startIndex: 6, type: '' },
				{ startIndex: 7, type: 'keyword.string.aes' },
				{ startIndex: 13, type: 'delimiter.aes' }
			]
		}
	],

	[
		{
			line: 'experience : int,',
			tokens: [
				{ startIndex: 0, type: 'identifier.aes' },
				{ startIndex: 10, type: '' },
				{ startIndex: 11, type: 'delimiter.aes' },
				{ startIndex: 12, type: '' },
				{ startIndex: 13, type: 'keyword.int.aes' },
				{ startIndex: 16, type: 'delimiter.aes' }
			]
		}
	],

	[
		{
			line: 'skillset : list(string) }',
			tokens: [
				{ startIndex: 0, type: 'identifier.aes' },
				{ startIndex: 8, type: '' },
				{ startIndex: 9, type: 'delimiter.aes' },
				{ startIndex: 10, type: '' },
				{ startIndex: 11, type: 'keyword.list.aes' },
				{ startIndex: 15, type: 'delimiter.parenthesis.aes' },
				{ startIndex: 16, type: 'keyword.string.aes' },
				{ startIndex: 22, type: 'delimiter.parenthesis.aes' },
				{ startIndex: 23, type: '' },
				{ startIndex: 24, type: 'delimiter.curly.aes' }
			]
		}
	],

	[
		{
			line: 'entrypoint init() = { developers = {} }',
			tokens: [
				{ startIndex: 0, type: 'keyword.entrypoint.aes' },
				{ startIndex: 10, type: '' },
				{ startIndex: 11, type: 'identifier.aes' },
				{ startIndex: 15, type: 'delimiter.parenthesis.aes' },
				{ startIndex: 17, type: '' },
				{ startIndex: 18, type: 'delimiter.aes' },
				{ startIndex: 19, type: '' },
				{ startIndex: 20, type: 'delimiter.curly.aes' },
				{ startIndex: 21, type: '' },
				{ startIndex: 22, type: 'identifier.aes' },
				{ startIndex: 32, type: '' },
				{ startIndex: 33, type: 'delimiter.aes' },
				{ startIndex: 34, type: '' },
				{ startIndex: 35, type: 'delimiter.curly.aes' },
				{ startIndex: 37, type: '' },
				{ startIndex: 38, type: 'delimiter.curly.aes' }
			]
		}
	],

	// Comments - single line
	[
		{
			line: '//',
			tokens: [{ startIndex: 0, type: 'comment.aes' }]
		}
	],

	[
		{
			line: '    // a comment',
			tokens: [
				{ startIndex: 0, type: '' },
				{ startIndex: 4, type: 'comment.aes' }
			]
		}
	],

	[
		{
			line: '// a comment',
			tokens: [{ startIndex: 0, type: 'comment.aes' }]
		}
	],

	[
		{
			line: '//sticky comment',
			tokens: [{ startIndex: 0, type: 'comment.aes' }]
		}
	],

	[
		{
			line: '/almost a comment',
			tokens: [
				{ startIndex: 0, type: 'delimiter.aes' },
				{ startIndex: 1, type: 'identifier.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'identifier.aes' },
				{ startIndex: 9, type: '' },
				{ startIndex: 10, type: 'identifier.aes' }
			]
		}
	],

	[
		{
			line: '/* //*/ a',
			tokens: [
				{ startIndex: 0, type: 'comment.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'identifier.aes' }
			]
		}
	],

	[
		{
			line: '1 / 2 /* comment',
			tokens: [
				{ startIndex: 0, type: 'number.aes' },
				{ startIndex: 1, type: '' },
				{ startIndex: 2, type: 'delimiter.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'number.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'comment.aes' }
			]
		}
	],

	[
		{
			line: 'let x : int = 1 // my comment // is a nice one',
			tokens: [
				{ startIndex: 0, type: 'keyword.let.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'identifier.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'delimiter.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'keyword.int.aes' },
				{ startIndex: 11, type: '' },
				{ startIndex: 12, type: 'delimiter.aes' },
				{ startIndex: 13, type: '' },
				{ startIndex: 14, type: 'number.aes' },
				{ startIndex: 15, type: '' },
				{ startIndex: 16, type: 'comment.aes' }
			]
		}
	],

	// Comments - range comment, single line
	[
		{
			line: '/* a simple comment */',
			tokens: [{ startIndex: 0, type: 'comment.aes' }]
		}
	],

	[
		{
			line: 'let x : int = /* a simple comment */ 1',
			tokens: [
				{ startIndex: 0, type: 'keyword.let.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'identifier.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'delimiter.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'keyword.int.aes' },
				{ startIndex: 11, type: '' },
				{ startIndex: 12, type: 'delimiter.aes' },
				{ startIndex: 13, type: '' },
				{ startIndex: 14, type: 'comment.aes' },
				{ startIndex: 36, type: '' },
				{ startIndex: 37, type: 'number.aes' }
			]
		}
	],

	[
		{
			line: 'let x = /* comment */ 1 */',
			tokens: [
				{ startIndex: 0, type: 'keyword.let.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'identifier.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'delimiter.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'comment.aes' },
				{ startIndex: 21, type: '' },
				{ startIndex: 22, type: 'number.aes' },
				{ startIndex: 23, type: '' }
			]
		}
	],

	[
		{
			line: 'let x = /**/',
			tokens: [
				{ startIndex: 0, type: 'keyword.let.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'identifier.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'delimiter.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'comment.aes' }
			]
		}
	],

	[
		{
			line: 'let x = /*/',
			tokens: [
				{ startIndex: 0, type: 'keyword.let.aes' },
				{ startIndex: 3, type: '' },
				{ startIndex: 4, type: 'identifier.aes' },
				{ startIndex: 5, type: '' },
				{ startIndex: 6, type: 'delimiter.aes' },
				{ startIndex: 7, type: '' },
				{ startIndex: 8, type: 'comment.aes' }
			]
		}
	]
]);