class ref JsonTokenParser
"""
Streaming JSON token parser.
Parses a JSON string and emits tokens to a JsonTokenNotify callback.
This is the lower-level API — most users should use JsonParser.parse()
instead. Use this when you need to process large documents without
materializing the full tree.
"""
var _notify: JsonTokenNotify
var _source: String box = ""
var _offset: USize = 0
var _token_start: USize = 0
var _line: USize = 1
var _abort: Bool = false
var last_number: (I64 | F64) = I64(0)
"""The most recently parsed number value."""
var last_string: String = ""
"""The most recently parsed string or key value."""
new ref create(notify': JsonTokenNotify) =>
_notify = notify'
fun ref parse(source': String box) ? =>
"""Parse a JSON document, emitting tokens to the notify callback."""
_source = source'
_offset = 0
_token_start = 0
_line = 1
_abort = false
last_number = I64(0)
last_string = ""
_skip_whitespace()
if _offset >= _source.size() then return end
_parse_value()?
_skip_whitespace()
if _offset < _source.size() then error end
fun ref abort() =>
"""Signal the parser to stop after the current token."""
_abort = true
fun token_start(): USize =>
"""Byte offset where the current token starts."""
_token_start
fun token_end(): USize =>
"""Byte offset where the current token ends."""
_offset
fun line(): USize =>
"""Current line number (1-based)."""
_line
fun describe_error(): String =>
"""Human-readable description of the error location."""
if _offset < _source.size() then
"Invalid JSON at byte offset " + _offset.string()
+ ", line " + _line.string()
else
"Unexpected end of JSON at byte offset "
+ _source.size().string()
+ ", line " + _line.string()
end
// --- Private parsing methods ---
fun ref _emit(token: JsonToken) ? =>
_notify(this, token)
if _abort then error end
fun ref _parse_value() ? =>
_skip_whitespace()
_token_start = _offset
match _peek()?
| '{' => _parse_object()?
| '[' => _parse_array()?
| '"' => _parse_string(false)?
| 't' => _parse_true()?
| 'f' => _parse_false()?
| 'n' => _parse_null()?
| let c: U8 if (c == '-') or ((c >= '0') and (c <= '9')) =>
_parse_number()?
else
error
end
fun ref _parse_object() ? =>
_next()? // consume '{'
_emit(JsonTokenObjectStart)?
_skip_whitespace()
if _peek()? == '}' then
_next()?
_emit(JsonTokenObjectEnd)?
return
end
while true do
_skip_whitespace()
_token_start = _offset
_parse_string(true)? // parse key
_skip_whitespace()
_eat(':')?
_parse_value()? // parse value
_skip_whitespace()
_token_start = _offset
match _next()?
| ',' => None
| '}' =>
_emit(JsonTokenObjectEnd)?
return
else error
end
end
fun ref _parse_array() ? =>
_next()? // consume '['
_emit(JsonTokenArrayStart)?
_skip_whitespace()
if _peek()? == ']' then
_next()?
_emit(JsonTokenArrayEnd)?
return
end
while true do
_parse_value()?
_skip_whitespace()
_token_start = _offset
match _next()?
| ',' => None
| ']' =>
_emit(JsonTokenArrayEnd)?
return
else error
end
end
fun ref _parse_true() ? =>
_eat('t')?; _eat('r')?; _eat('u')?; _eat('e')?
_emit(JsonTokenTrue)?
fun ref _parse_false() ? =>
_eat('f')?; _eat('a')?; _eat('l')?; _eat('s')?; _eat('e')?
_emit(JsonTokenFalse)?
fun ref _parse_null() ? =>
_eat('n')?; _eat('u')?; _eat('l')?; _eat('l')?
_emit(JsonTokenNull)?
fun ref _parse_number() ? =>
let sign: I64 = if _peek_safe() == '-' then _next()?; -1 else 1 end
let int_start = _offset
let integer = _read_digits()?
let int_digits = _offset - int_start
// RFC 8259: leading zeros not allowed (e.g., 01, 00, 007)
if try (_source(int_start)? == '0') and (int_digits > 1)
else false end then
error
end
// For large integers, re-read as F64 to get the correct value
// (_read_digits accumulates into I64 which silently wraps on overflow)
let force_float = int_digits > 18
let integer_f64: F64 = if force_float then
_offset = int_start
_read_digits_f64()?
else
integer.f64()
end
var has_dot = false
var frac: F64 = 0
if _peek_safe() == '.' then
_next()?
has_dot = true
frac = _read_fractional()?
end
var has_exp = false
var exp: I64 = 0
match _peek_safe()
| 'e' | 'E' =>
_next()?
has_exp = true
let exp_sign: I64 = match _peek()?
| '+' => _next()?; 1
| '-' => _next()?; -1
else 1
end
exp = _read_digits()? * exp_sign
end
if has_dot or has_exp or force_float then
last_number = sign.f64() * (integer_f64 + frac)
* F64(10).pow(exp.f64())
else
last_number = sign * integer
end
_emit(JsonTokenNumber)?
fun ref _read_digits(): I64 ? =>
var result: I64 = 0
var count: USize = 0
while _offset < _source.size() do
let c = _source(_offset)?
if (c >= '0') and (c <= '9') then
result = (result * 10) + (c - '0').i64()
_offset = _offset + 1
count = count + 1
else
break
end
end
if count == 0 then error end
result
fun ref _read_digits_f64(): F64 ? =>
var result: F64 = 0
var count: USize = 0
while _offset < _source.size() do
let c = _source(_offset)?
if (c >= '0') and (c <= '9') then
result = (result * 10) + (c - '0').f64()
_offset = _offset + 1
count = count + 1
else
break
end
end
if count == 0 then error end
result
fun ref _read_fractional(): F64 ? =>
var result: F64 = 0
var divisor: F64 = 10
var count: USize = 0
while _offset < _source.size() do
let c = _source(_offset)?
if (c >= '0') and (c <= '9') then
result = result + ((c - '0').f64() / divisor)
divisor = divisor * 10
_offset = _offset + 1
count = count + 1
else
break
end
end
if count == 0 then error end
result
fun ref _parse_string(is_key: Bool) ? =>
_eat('"')?
_token_start = _offset
var buf = recover String end
while true do
match _next()?
| '"' => break
| '\\' =>
match _next()?
| '"' => buf.push('"')
| '\\' => buf.push('\\')
| '/' => buf.push('/')
| 'b' => buf.push(0x08)
| 'f' => buf.push(0x0C)
| 'n' => buf.push('\n')
| 'r' => buf.push('\r')
| 't' => buf.push('\t')
| 'u' => buf = _parse_unicode(consume buf)?
else error
end
| let c: U8 if c < 0x20 => error
| let c: U8 => buf.push(c)
end
end
last_string = consume buf
_emit(if is_key then JsonTokenKey else JsonTokenString end)?
fun ref _parse_unicode(buf: String iso): String iso^ ? =>
let value = _read_hex4()?
if (value >= 0xD800) and (value < 0xDC00) then
// High surrogate — expect \uXXXX low surrogate
_eat('\\')?; _eat('u')?
let low = _read_hex4()?
if (low >= 0xDC00) and (low < 0xE000) then
let combined =
0x10000 + (((value and 0x3FF) << 10) or (low and 0x3FF))
buf.append(recover val String.from_utf32(combined) end)
else
error
end
elseif (value >= 0xDC00) and (value < 0xE000) then
error // lone low surrogate
else
buf.append(recover val String.from_utf32(value) end)
end
consume buf
fun ref _read_hex4(): U32 ? =>
var result: U32 = 0
var i: USize = 0
while i < 4 do
let c = _next()?
let digit: U32 = if (c >= '0') and (c <= '9') then
(c - '0').u32()
elseif (c >= 'a') and (c <= 'f') then
(c - 'a').u32() + 10
elseif (c >= 'A') and (c <= 'F') then
(c - 'A').u32() + 10
else
error
end
result = (result << 4) or digit
i = i + 1
end
result
// --- Character primitives ---
fun _peek(): U8 ? =>
_source(_offset)?
fun _peek_safe(): U8 =>
try _source(_offset)? else 0 end
fun ref _next(): U8 ? =>
let c = _source(_offset)?
_offset = _offset + 1
if c == '\n' then _line = _line + 1 end
c
fun ref _eat(expected: U8) ? =>
if _source(_offset)? != expected then error end
_offset = _offset + 1
if expected == '\n' then _line = _line + 1 end
fun ref _skip_whitespace() =>
while _offset < _source.size() do
match try _source(_offset)? else return end
| ' ' | '\t' | '\r' => _offset = _offset + 1
| '\n' => _offset = _offset + 1; _line = _line + 1
else return
end
end