File: //home/ubuntu/neovim/.deps/build/src/treesitter_markdown/tree-sitter-markdown/src/scanner.c
#include "tree_sitter/parser.h"
#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
// For explanation of the tokens see grammar.js
typedef enum {
LINE_ENDING,
SOFT_LINE_ENDING,
BLOCK_CLOSE,
BLOCK_CONTINUATION,
BLOCK_QUOTE_START,
INDENTED_CHUNK_START,
ATX_H1_MARKER,
ATX_H2_MARKER,
ATX_H3_MARKER,
ATX_H4_MARKER,
ATX_H5_MARKER,
ATX_H6_MARKER,
SETEXT_H1_UNDERLINE,
SETEXT_H2_UNDERLINE,
THEMATIC_BREAK,
LIST_MARKER_MINUS,
LIST_MARKER_PLUS,
LIST_MARKER_STAR,
LIST_MARKER_PARENTHESIS,
LIST_MARKER_DOT,
LIST_MARKER_MINUS_DONT_INTERRUPT,
LIST_MARKER_PLUS_DONT_INTERRUPT,
LIST_MARKER_STAR_DONT_INTERRUPT,
LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
LIST_MARKER_DOT_DONT_INTERRUPT,
FENCED_CODE_BLOCK_START_BACKTICK,
FENCED_CODE_BLOCK_START_TILDE,
BLANK_LINE_START,
FENCED_CODE_BLOCK_END_BACKTICK,
FENCED_CODE_BLOCK_END_TILDE,
HTML_BLOCK_1_START,
HTML_BLOCK_1_END,
HTML_BLOCK_2_START,
HTML_BLOCK_3_START,
HTML_BLOCK_4_START,
HTML_BLOCK_5_START,
HTML_BLOCK_6_START,
HTML_BLOCK_7_START,
CLOSE_BLOCK,
NO_INDENTED_CHUNK,
ERROR,
TRIGGER_ERROR,
TOKEN_EOF,
MINUS_METADATA,
PLUS_METADATA,
PIPE_TABLE_START,
PIPE_TABLE_LINE_ENDING,
} TokenType;
// Description of a block on the block stack.
//
// LIST_ITEM is a list item with minimal indentation (content begins at indent
// level 2) while LIST_ITEM_MAX_INDENTATION represents a list item with maximal
// indentation without being considered a indented code block.
//
// ANONYMOUS represents any block that whose close is not handled by the
// external s.
typedef enum {
BLOCK_QUOTE,
INDENTED_CODE_BLOCK,
LIST_ITEM,
LIST_ITEM_1_INDENTATION,
LIST_ITEM_2_INDENTATION,
LIST_ITEM_3_INDENTATION,
LIST_ITEM_4_INDENTATION,
LIST_ITEM_5_INDENTATION,
LIST_ITEM_6_INDENTATION,
LIST_ITEM_7_INDENTATION,
LIST_ITEM_8_INDENTATION,
LIST_ITEM_9_INDENTATION,
LIST_ITEM_10_INDENTATION,
LIST_ITEM_11_INDENTATION,
LIST_ITEM_12_INDENTATION,
LIST_ITEM_13_INDENTATION,
LIST_ITEM_14_INDENTATION,
LIST_ITEM_MAX_INDENTATION,
FENCED_CODE_BLOCK,
ANONYMOUS,
} Block;
// Determines if a character is punctuation as defined by the markdown spec.
static bool is_punctuation(char chr) {
return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
(chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
}
// Returns the indentation level which lines of a list item should have at
// minimum. Should only be called with blocks for which `is_list_item` returns
// true.
static uint8_t list_item_indentation(Block block) {
return (uint8_t)(block - LIST_ITEM + 2);
}
#define NUM_HTML_TAG_NAMES_RULE_1 3
static const char *const HTML_TAG_NAMES_RULE_1[NUM_HTML_TAG_NAMES_RULE_1] = {
"pre", "script", "style"};
#define NUM_HTML_TAG_NAMES_RULE_7 62
static const char *const HTML_TAG_NAMES_RULE_7[NUM_HTML_TAG_NAMES_RULE_7] = {
"address", "article", "aside", "base", "basefont", "blockquote",
"body", "caption", "center", "col", "colgroup", "dd",
"details", "dialog", "dir", "div", "dl", "dt",
"fieldset", "figcaption", "figure", "footer", "form", "frame",
"frameset", "h1", "h2", "h3", "h4", "h5",
"h6", "head", "header", "hr", "html", "iframe",
"legend", "li", "link", "main", "menu", "menuitem",
"nav", "noframes", "ol", "optgroup", "option", "p",
"param", "section", "source", "summary", "table", "tbody",
"td", "tfoot", "th", "thead", "title", "tr",
"track", "ul"};
// For explanation of the tokens see grammar.js
static const bool paragraph_interrupt_symbols[] = {
false, // LINE_ENDING,
false, // SOFT_LINE_ENDING,
false, // BLOCK_CLOSE,
false, // BLOCK_CONTINUATION,
true, // BLOCK_QUOTE_START,
false, // INDENTED_CHUNK_START,
true, // ATX_H1_MARKER,
true, // ATX_H2_MARKER,
true, // ATX_H3_MARKER,
true, // ATX_H4_MARKER,
true, // ATX_H5_MARKER,
true, // ATX_H6_MARKER,
true, // SETEXT_H1_UNDERLINE,
true, // SETEXT_H2_UNDERLINE,
true, // THEMATIC_BREAK,
true, // LIST_MARKER_MINUS,
true, // LIST_MARKER_PLUS,
true, // LIST_MARKER_STAR,
true, // LIST_MARKER_PARENTHESIS,
true, // LIST_MARKER_DOT,
false, // LIST_MARKER_MINUS_DONT_INTERRUPT,
false, // LIST_MARKER_PLUS_DONT_INTERRUPT,
false, // LIST_MARKER_STAR_DONT_INTERRUPT,
false, // LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
false, // LIST_MARKER_DOT_DONT_INTERRUPT,
true, // FENCED_CODE_BLOCK_START_BACKTICK,
true, // FENCED_CODE_BLOCK_START_TILDE,
true, // BLANK_LINE_START,
false, // FENCED_CODE_BLOCK_END_BACKTICK,
false, // FENCED_CODE_BLOCK_END_TILDE,
true, // HTML_BLOCK_1_START,
false, // HTML_BLOCK_1_END,
true, // HTML_BLOCK_2_START,
true, // HTML_BLOCK_3_START,
true, // HTML_BLOCK_4_START,
true, // HTML_BLOCK_5_START,
true, // HTML_BLOCK_6_START,
false, // HTML_BLOCK_7_START,
false, // CLOSE_BLOCK,
false, // NO_INDENTED_CHUNK,
false, // ERROR,
false, // TRIGGER_ERROR,
false, // EOF,
false, // MINUS_METADATA,
false, // PLUS_METADATA,
true, // PIPE_TABLE_START,
false, // PIPE_TABLE_LINE_ENDING,
};
// State bitflags used with `Scanner.state`
// Currently matching (at the beginning of a line)
static const uint8_t STATE_MATCHING = 0x1 << 0;
// Last line break was inside a paragraph
static const uint8_t STATE_WAS_SOFT_LINE_BREAK = 0x1 << 1;
// Block should be closed after next line break
static const uint8_t STATE_CLOSE_BLOCK = 0x1 << 4;
static size_t roundup_32(size_t x) {
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
}
typedef struct {
// A stack of open blocks in the current parse state
struct {
size_t size;
size_t capacity;
Block *items;
} open_blocks;
// Parser state flags
uint8_t state;
// Number of blocks that have been matched so far. Only changes during
// matching and is reset after every line ending
uint8_t matched;
// Consumed but "unused" indentation. Sometimes a tab needs to be "split" to
// be used in multiple tokens.
uint8_t indentation;
// The current column. Used to decide how many spaces a tab should equal
uint8_t column;
// The delimiter length of the currently open fenced code block
uint8_t fenced_code_block_delimiter_length;
bool simulate;
} Scanner;
static void push_block(Scanner *s, Block b) {
if (s->open_blocks.size == s->open_blocks.capacity) {
s->open_blocks.capacity =
s->open_blocks.capacity ? s->open_blocks.capacity << 1 : 8;
void *tmp = realloc(s->open_blocks.items,
sizeof(Block) * s->open_blocks.capacity);
assert(tmp != NULL);
s->open_blocks.items = tmp;
}
s->open_blocks.items[s->open_blocks.size++] = b;
}
static inline Block pop_block(Scanner *s) {
return s->open_blocks.items[--s->open_blocks.size];
}
// Write the whole state of a Scanner to a byte buffer
static unsigned serialize(Scanner *s, char *buffer) {
unsigned size = 0;
buffer[size++] = (char)s->state;
buffer[size++] = (char)s->matched;
buffer[size++] = (char)s->indentation;
buffer[size++] = (char)s->column;
buffer[size++] = (char)s->fenced_code_block_delimiter_length;
size_t blocks_count = s->open_blocks.size;
if (blocks_count > 0) {
memcpy(&buffer[size], s->open_blocks.items,
blocks_count * sizeof(Block));
size += blocks_count * sizeof(Block);
}
return size;
}
// Read the whole state of a Scanner from a byte buffer
// `serizalize` and `deserialize` should be fully symmetric.
static void deserialize(Scanner *s, const char *buffer, unsigned length) {
s->open_blocks.size = 0;
s->open_blocks.capacity = 0;
s->state = 0;
s->matched = 0;
s->indentation = 0;
s->column = 0;
s->fenced_code_block_delimiter_length = 0;
if (length > 0) {
size_t size = 0;
s->state = (uint8_t)buffer[size++];
s->matched = (uint8_t)buffer[size++];
s->indentation = (uint8_t)buffer[size++];
s->column = (uint8_t)buffer[size++];
s->fenced_code_block_delimiter_length = (uint8_t)buffer[size++];
size_t blocks_size = length - size;
if (blocks_size > 0) {
size_t blocks_count = blocks_size / sizeof(Block);
// ensure open blocks has enough room
if (s->open_blocks.capacity < blocks_count) {
size_t capacity = roundup_32(blocks_count);
void *tmp = realloc(s->open_blocks.items,
sizeof(Block) * capacity);
assert(tmp != NULL);
s->open_blocks.items = tmp;
s->open_blocks.capacity = capacity;
}
memcpy(s->open_blocks.items, &buffer[size], blocks_size);
s->open_blocks.size = blocks_count;
}
}
}
static void mark_end(Scanner *s, TSLexer *lexer) {
if (!s->simulate) {
lexer->mark_end(lexer);
}
}
// Convenience function to emit the error token. This is done to stop invalid
// parse branches. Specifically:
// 1. When encountering a newline after a line break that ended a paragraph, and
// no new block
// has been opened.
// 2. When encountering a new block after a soft line break.
// 3. When a `$._trigger_error` token is valid, which is used to stop parse
// branches through
// normal tree-sitter grammar rules.
//
// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
// grammar.js
static bool error(TSLexer *lexer) {
lexer->result_symbol = ERROR;
return true;
}
// Advance the lexer one character
// Also keeps track of the current column, counting tabs as spaces with tab stop
// 4 See https://github.github.com/gfm/#tabs
static size_t advance(Scanner *s, TSLexer *lexer) {
size_t size = 1;
if (lexer->lookahead == '\t') {
size = 4 - s->column;
s->column = 0;
} else {
s->column = (s->column + 1) % 4;
}
lexer->advance(lexer, false);
return size;
}
// Try to match the given block, i.e. consume all tokens that belong to the
// block. These are
// 1. indentation for list items and indented code blocks
// 2. '>' for block quotes
// Returns true if the block is matched and false otherwise
static bool match(Scanner *s, TSLexer *lexer, Block block) {
switch (block) {
case INDENTED_CODE_BLOCK:
while (s->indentation < 4) {
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
} else {
break;
}
}
if (s->indentation >= 4 && lexer->lookahead != '\n' &&
lexer->lookahead != '\r') {
s->indentation -= 4;
return true;
}
break;
case LIST_ITEM:
case LIST_ITEM_1_INDENTATION:
case LIST_ITEM_2_INDENTATION:
case LIST_ITEM_3_INDENTATION:
case LIST_ITEM_4_INDENTATION:
case LIST_ITEM_5_INDENTATION:
case LIST_ITEM_6_INDENTATION:
case LIST_ITEM_7_INDENTATION:
case LIST_ITEM_8_INDENTATION:
case LIST_ITEM_9_INDENTATION:
case LIST_ITEM_10_INDENTATION:
case LIST_ITEM_11_INDENTATION:
case LIST_ITEM_12_INDENTATION:
case LIST_ITEM_13_INDENTATION:
case LIST_ITEM_14_INDENTATION:
case LIST_ITEM_MAX_INDENTATION:
while (s->indentation < list_item_indentation(block)) {
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
} else {
break;
}
}
if (s->indentation >= list_item_indentation(block)) {
s->indentation -= list_item_indentation(block);
return true;
}
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
s->indentation = 0;
return true;
}
break;
case BLOCK_QUOTE:
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
}
if (lexer->lookahead == '>') {
advance(s, lexer);
s->indentation = 0;
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer) - 1;
}
return true;
}
break;
case FENCED_CODE_BLOCK:
case ANONYMOUS:
return true;
}
return false;
}
static bool parse_fenced_code_block(Scanner *s, const char delimiter,
TSLexer *lexer, const bool *valid_symbols) {
// count the number of backticks
uint8_t level = 0;
while (lexer->lookahead == delimiter) {
advance(s, lexer);
level++;
}
mark_end(s, lexer);
// If this is able to close a fenced code block then that is the only valid
// interpretation. It can only close a fenced code block if the number of
// backticks is at least the number of backticks of the opening delimiter.
// Also it cannot be indented more than 3 spaces.
if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_END_BACKTICK]
: valid_symbols[FENCED_CODE_BLOCK_END_TILDE]) &&
s->indentation < 4 && level >= s->fenced_code_block_delimiter_length &&
(lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
s->fenced_code_block_delimiter_length = 0;
lexer->result_symbol = delimiter == '`' ? FENCED_CODE_BLOCK_END_BACKTICK
: FENCED_CODE_BLOCK_END_TILDE;
return true;
}
// If this could be the start of a fenced code block, check if the info
// string contains any backticks.
if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_START_BACKTICK]
: valid_symbols[FENCED_CODE_BLOCK_START_TILDE]) &&
level >= 3) {
bool info_string_has_backtick = false;
if (delimiter == '`') {
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
!lexer->eof(lexer)) {
if (lexer->lookahead == '`') {
info_string_has_backtick = true;
break;
}
advance(s, lexer);
}
}
// If it does not then choose to interpret this as the start of a fenced
// code block.
if (!info_string_has_backtick) {
lexer->result_symbol = delimiter == '`'
? FENCED_CODE_BLOCK_START_BACKTICK
: FENCED_CODE_BLOCK_START_TILDE;
if (!s->simulate)
push_block(s, FENCED_CODE_BLOCK);
// Remember the length of the delimiter for later, since we need it
// to decide whether a sequence of backticks can close the block.
s->fenced_code_block_delimiter_length = level;
s->indentation = 0;
return true;
}
}
return false;
}
static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
advance(s, lexer);
mark_end(s, lexer);
// Otherwise count the number of stars permitting whitespaces between them.
size_t star_count = 1;
// Also remember how many stars there are before the first whitespace...
// ...and how many spaces follow the first star.
uint8_t extra_indentation = 0;
for (;;) {
if (lexer->lookahead == '*') {
if (star_count == 1 && extra_indentation >= 1 &&
valid_symbols[LIST_MARKER_STAR]) {
// If we get to this point then the token has to be at least
// this long. We need to call `mark_end` here in case we decide
// later that this is a list item.
mark_end(s, lexer);
}
star_count++;
advance(s, lexer);
} else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (star_count == 1) {
extra_indentation += advance(s, lexer);
} else {
advance(s, lexer);
}
} else {
break;
}
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
bool dont_interrupt = false;
if (star_count == 1 && line_end) {
extra_indentation = 1;
// line is empty so don't interrupt paragraphs if this is a list marker
dont_interrupt = s->matched == s->open_blocks.size;
}
// If there were at least 3 stars then this could be a thematic break
bool thematic_break = star_count >= 3 && line_end;
// If there was a star and at least one space after that star then this
// could be a list marker.
bool list_marker_star = star_count >= 1 && extra_indentation >= 1;
if (valid_symbols[THEMATIC_BREAK] && thematic_break && s->indentation < 4) {
// If a thematic break is valid then it takes precedence
lexer->result_symbol = THEMATIC_BREAK;
mark_end(s, lexer);
s->indentation = 0;
return true;
}
if ((dont_interrupt ? valid_symbols[LIST_MARKER_STAR_DONT_INTERRUPT]
: valid_symbols[LIST_MARKER_STAR]) &&
list_marker_star) {
// List markers take precedence over emphasis markers
// If star_count > 1 then we already called mark_end at the right point.
// Otherwise the token should go until this point.
if (star_count == 1) {
mark_end(s, lexer);
}
// Not counting one space...
extra_indentation--;
// ... check if the list item begins with an indented code block
if (extra_indentation <= 3) {
// If not then calculate the indentation level of the list item
// content as indentation of list marker + indentation after list
// marker - 1
extra_indentation += s->indentation;
s->indentation = 0;
} else {
// Otherwise the indentation level is just the indentation of the
// list marker. We keep the indentation after the list marker for
// later blocks.
uint8_t temp = s->indentation;
s->indentation = extra_indentation;
extra_indentation = temp;
}
if (!s->simulate)
push_block(s, (Block)(LIST_ITEM + extra_indentation));
lexer->result_symbol =
dont_interrupt ? LIST_MARKER_STAR_DONT_INTERRUPT : LIST_MARKER_STAR;
return true;
}
return false;
}
static bool parse_thematic_break_underscore(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
advance(s, lexer);
mark_end(s, lexer);
size_t underscore_count = 1;
for (;;) {
if (lexer->lookahead == '_') {
underscore_count++;
advance(s, lexer);
} else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
} else {
break;
}
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
if (underscore_count >= 3 && line_end && valid_symbols[THEMATIC_BREAK]) {
lexer->result_symbol = THEMATIC_BREAK;
mark_end(s, lexer);
s->indentation = 0;
return true;
}
return false;
}
static bool parse_block_quote(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[BLOCK_QUOTE_START]) {
advance(s, lexer);
s->indentation = 0;
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer) - 1;
}
lexer->result_symbol = BLOCK_QUOTE_START;
if (!s->simulate)
push_block(s, BLOCK_QUOTE);
return true;
}
return false;
}
static bool parse_atx_heading(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[ATX_H1_MARKER] && s->indentation <= 3) {
mark_end(s, lexer);
uint16_t level = 0;
while (lexer->lookahead == '#' && level <= 6) {
advance(s, lexer);
level++;
}
if (level <= 6 &&
(lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
lexer->result_symbol = ATX_H1_MARKER + (level - 1);
s->indentation = 0;
mark_end(s, lexer);
return true;
}
}
return false;
}
static bool parse_setext_underline(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[SETEXT_H1_UNDERLINE] &&
s->matched == s->open_blocks.size) {
mark_end(s, lexer);
while (lexer->lookahead == '=') {
advance(s, lexer);
}
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
lexer->result_symbol = SETEXT_H1_UNDERLINE;
mark_end(s, lexer);
return true;
}
}
return false;
}
static bool parse_plus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
if (s->indentation <= 3 &&
(valid_symbols[LIST_MARKER_PLUS] ||
valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT] ||
valid_symbols[PLUS_METADATA])) {
advance(s, lexer);
if (valid_symbols[PLUS_METADATA] && lexer->lookahead == '+') {
advance(s, lexer);
if (lexer->lookahead != '+') {
return false;
}
advance(s, lexer);
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead != '\n' && lexer->lookahead != '\r') {
return false;
}
for (;;) {
// advance over newline
if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
// check for pluses
size_t plus_count = 0;
while (lexer->lookahead == '+') {
plus_count++;
advance(s, lexer);
}
if (plus_count == 3) {
// if exactly 3 check if next symbol (after eventual
// whitespace) is newline
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
// if so also consume newline
if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
mark_end(s, lexer);
lexer->result_symbol = PLUS_METADATA;
return true;
}
}
// otherwise consume rest of line
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
!lexer->eof(lexer)) {
advance(s, lexer);
}
// if end of file is reached, then this is not metadata
if (lexer->eof(lexer)) {
break;
}
}
} else {
uint8_t extra_indentation = 0;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
extra_indentation += advance(s, lexer);
}
bool dont_interrupt = false;
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
extra_indentation = 1;
dont_interrupt = true;
}
dont_interrupt =
dont_interrupt && s->matched == s->open_blocks.size;
if (extra_indentation >= 1 &&
(dont_interrupt ? valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT]
: valid_symbols[LIST_MARKER_PLUS])) {
lexer->result_symbol = dont_interrupt
? LIST_MARKER_PLUS_DONT_INTERRUPT
: LIST_MARKER_PLUS;
extra_indentation--;
if (extra_indentation <= 3) {
extra_indentation += s->indentation;
s->indentation = 0;
} else {
uint8_t temp = s->indentation;
s->indentation = extra_indentation;
extra_indentation = temp;
}
if (!s->simulate)
push_block(s, (Block)(LIST_ITEM + extra_indentation));
return true;
}
}
}
return false;
}
static bool parse_ordered_list_marker(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
if (s->indentation <= 3 &&
(valid_symbols[LIST_MARKER_PARENTHESIS] ||
valid_symbols[LIST_MARKER_DOT] ||
valid_symbols[LIST_MARKER_PARENTHESIS_DONT_INTERRUPT] ||
valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT])) {
size_t digits = 1;
bool dont_interrupt = lexer->lookahead != '1';
advance(s, lexer);
while (isdigit(lexer->lookahead)) {
dont_interrupt = true;
digits++;
advance(s, lexer);
}
if (digits >= 1 && digits <= 9) {
bool dot = false;
bool parenthesis = false;
if (lexer->lookahead == '.') {
advance(s, lexer);
dot = true;
} else if (lexer->lookahead == ')') {
advance(s, lexer);
parenthesis = true;
}
if (dot || parenthesis) {
uint8_t extra_indentation = 0;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
extra_indentation += advance(s, lexer);
}
bool line_end =
lexer->lookahead == '\n' || lexer->lookahead == '\r';
if (line_end) {
extra_indentation = 1;
dont_interrupt = true;
}
dont_interrupt =
dont_interrupt && s->matched == s->open_blocks.size;
if (extra_indentation >= 1 &&
(dot ? (dont_interrupt
? valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT]
: valid_symbols[LIST_MARKER_DOT])
: (dont_interrupt
? valid_symbols
[LIST_MARKER_PARENTHESIS_DONT_INTERRUPT]
: valid_symbols[LIST_MARKER_PARENTHESIS]))) {
lexer->result_symbol =
dot ? LIST_MARKER_DOT : LIST_MARKER_PARENTHESIS;
extra_indentation--;
if (extra_indentation <= 3) {
extra_indentation += s->indentation;
s->indentation = 0;
} else {
uint8_t temp = s->indentation;
s->indentation = extra_indentation;
extra_indentation = temp;
}
if (!s->simulate)
push_block(
s, (Block)(LIST_ITEM + extra_indentation + digits));
return true;
}
}
}
}
return false;
}
static bool parse_minus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
if (s->indentation <= 3 &&
(valid_symbols[LIST_MARKER_MINUS] ||
valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT] ||
valid_symbols[SETEXT_H2_UNDERLINE] || valid_symbols[THEMATIC_BREAK] ||
valid_symbols[MINUS_METADATA])) {
mark_end(s, lexer);
bool whitespace_after_minus = false;
bool minus_after_whitespace = false;
size_t minus_count = 0;
uint8_t extra_indentation = 0;
for (;;) {
if (lexer->lookahead == '-') {
if (minus_count == 1 && extra_indentation >= 1) {
mark_end(s, lexer);
}
minus_count++;
advance(s, lexer);
minus_after_whitespace = whitespace_after_minus;
} else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (minus_count == 1) {
extra_indentation += advance(s, lexer);
} else {
advance(s, lexer);
}
whitespace_after_minus = true;
} else {
break;
}
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
bool dont_interrupt = false;
if (minus_count == 1 && line_end) {
extra_indentation = 1;
dont_interrupt = true;
}
dont_interrupt = dont_interrupt && s->matched == s->open_blocks.size;
bool thematic_break = minus_count >= 3 && line_end;
bool underline =
minus_count >= 1 && !minus_after_whitespace && line_end &&
s->matched ==
s->open_blocks
.size; // setext heading can not break lazy continuation
bool list_marker_minus = minus_count >= 1 && extra_indentation >= 1;
bool success = false;
if (valid_symbols[SETEXT_H2_UNDERLINE] && underline) {
lexer->result_symbol = SETEXT_H2_UNDERLINE;
mark_end(s, lexer);
s->indentation = 0;
success = true;
} else if (valid_symbols[THEMATIC_BREAK] &&
thematic_break) { // underline is false if list_marker_minus
// is true
lexer->result_symbol = THEMATIC_BREAK;
mark_end(s, lexer);
s->indentation = 0;
success = true;
} else if ((dont_interrupt
? valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT]
: valid_symbols[LIST_MARKER_MINUS]) &&
list_marker_minus) {
if (minus_count == 1) {
mark_end(s, lexer);
}
extra_indentation--;
if (extra_indentation <= 3) {
extra_indentation += s->indentation;
s->indentation = 0;
} else {
uint8_t temp = s->indentation;
s->indentation = extra_indentation;
extra_indentation = temp;
}
if (!s->simulate)
push_block(s, (Block)(LIST_ITEM + extra_indentation));
lexer->result_symbol = dont_interrupt
? LIST_MARKER_MINUS_DONT_INTERRUPT
: LIST_MARKER_MINUS;
return true;
}
if (minus_count == 3 && (!minus_after_whitespace) && line_end &&
valid_symbols[MINUS_METADATA]) {
for (;;) {
// advance over newline
if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
// check for minuses
minus_count = 0;
while (lexer->lookahead == '-') {
minus_count++;
advance(s, lexer);
}
if (minus_count == 3) {
// if exactly 3 check if next symbol (after eventual
// whitespace) is newline
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
// if so also consume newline
if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
mark_end(s, lexer);
lexer->result_symbol = MINUS_METADATA;
return true;
}
}
// otherwise consume rest of line
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
!lexer->eof(lexer)) {
advance(s, lexer);
}
// if end of file is reached, then this is not metadata
if (lexer->eof(lexer)) {
break;
}
}
}
if (success) {
return true;
}
}
return false;
}
static bool parse_html_block(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
if (!(valid_symbols[HTML_BLOCK_1_START] ||
valid_symbols[HTML_BLOCK_1_END] ||
valid_symbols[HTML_BLOCK_2_START] ||
valid_symbols[HTML_BLOCK_3_START] ||
valid_symbols[HTML_BLOCK_4_START] ||
valid_symbols[HTML_BLOCK_5_START] ||
valid_symbols[HTML_BLOCK_6_START] ||
valid_symbols[HTML_BLOCK_7_START])) {
return false;
}
advance(s, lexer);
if (lexer->lookahead == '?' && valid_symbols[HTML_BLOCK_3_START]) {
advance(s, lexer);
lexer->result_symbol = HTML_BLOCK_3_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
if (lexer->lookahead == '!') {
// could be block 2
advance(s, lexer);
if (lexer->lookahead == '-') {
advance(s, lexer);
if (lexer->lookahead == '-' && valid_symbols[HTML_BLOCK_2_START]) {
advance(s, lexer);
lexer->result_symbol = HTML_BLOCK_2_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
} else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z' &&
valid_symbols[HTML_BLOCK_4_START]) {
advance(s, lexer);
lexer->result_symbol = HTML_BLOCK_4_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
} else if (lexer->lookahead == '[') {
advance(s, lexer);
if (lexer->lookahead == 'C') {
advance(s, lexer);
if (lexer->lookahead == 'D') {
advance(s, lexer);
if (lexer->lookahead == 'A') {
advance(s, lexer);
if (lexer->lookahead == 'T') {
advance(s, lexer);
if (lexer->lookahead == 'A') {
advance(s, lexer);
if (lexer->lookahead == '[' &&
valid_symbols[HTML_BLOCK_5_START]) {
advance(s, lexer);
lexer->result_symbol = HTML_BLOCK_5_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
}
}
}
}
}
}
}
bool starting_slash = lexer->lookahead == '/';
if (starting_slash) {
advance(s, lexer);
}
char name[11];
size_t name_length = 0;
while (iswalpha((wint_t)lexer->lookahead)) {
if (name_length < 10) {
name[name_length++] = (char)towlower((wint_t)lexer->lookahead);
} else {
name_length = 12;
}
advance(s, lexer);
}
if (name_length == 0) {
return false;
}
bool tag_closed = false;
if (name_length < 11) {
name[name_length] = 0;
bool next_symbol_valid =
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->lookahead == '>';
if (next_symbol_valid) {
// try block 1 names
for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_1; i++) {
if (strcmp(name, HTML_TAG_NAMES_RULE_1[i]) == 0) {
if (starting_slash) {
if (valid_symbols[HTML_BLOCK_1_END]) {
lexer->result_symbol = HTML_BLOCK_1_END;
return true;
}
} else if (valid_symbols[HTML_BLOCK_1_START]) {
lexer->result_symbol = HTML_BLOCK_1_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
}
}
}
if (!next_symbol_valid && lexer->lookahead == '/') {
advance(s, lexer);
if (lexer->lookahead == '>') {
advance(s, lexer);
tag_closed = true;
}
}
if (next_symbol_valid || tag_closed) {
// try block 2 names
for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_7; i++) {
if (strcmp(name, HTML_TAG_NAMES_RULE_7[i]) == 0 &&
valid_symbols[HTML_BLOCK_6_START]) {
lexer->result_symbol = HTML_BLOCK_6_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
}
}
}
if (!valid_symbols[HTML_BLOCK_7_START]) {
return false;
}
if (!tag_closed) {
// tag name (continued)
while (iswalnum((wint_t)lexer->lookahead) || lexer->lookahead == '-') {
advance(s, lexer);
}
if (!starting_slash) {
// attributes
bool had_whitespace = false;
for (;;) {
// whitespace
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
had_whitespace = true;
advance(s, lexer);
}
if (lexer->lookahead == '/') {
advance(s, lexer);
break;
}
if (lexer->lookahead == '>') {
break;
}
// attribute name
if (!had_whitespace) {
return false;
}
if (!iswalpha((wint_t)lexer->lookahead) &&
lexer->lookahead != '_' && lexer->lookahead != ':') {
return false;
}
had_whitespace = false;
advance(s, lexer);
while (iswalnum((wint_t)lexer->lookahead) ||
lexer->lookahead == '_' || lexer->lookahead == '.' ||
lexer->lookahead == ':' || lexer->lookahead == '-') {
advance(s, lexer);
}
// attribute value specification
// optional whitespace
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
had_whitespace = true;
advance(s, lexer);
}
// =
if (lexer->lookahead == '=') {
advance(s, lexer);
had_whitespace = false;
// optional whitespace
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t') {
advance(s, lexer);
}
// attribute value
if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
char delimiter = (char)lexer->lookahead;
advance(s, lexer);
while (lexer->lookahead != delimiter &&
lexer->lookahead != '\n' &&
lexer->lookahead != '\r' && !lexer->eof(lexer)) {
advance(s, lexer);
}
if (lexer->lookahead != delimiter) {
return false;
}
advance(s, lexer);
} else {
// unquoted attribute value
bool had_one = false;
while (lexer->lookahead != ' ' &&
lexer->lookahead != '\t' &&
lexer->lookahead != '"' &&
lexer->lookahead != '\'' &&
lexer->lookahead != '=' &&
lexer->lookahead != '<' &&
lexer->lookahead != '>' &&
lexer->lookahead != '`' &&
lexer->lookahead != '\n' &&
lexer->lookahead != '\r' && !lexer->eof(lexer)) {
advance(s, lexer);
had_one = true;
}
if (!had_one) {
return false;
}
}
}
}
} else {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
}
if (lexer->lookahead != '>') {
return false;
}
advance(s, lexer);
}
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
lexer->result_symbol = HTML_BLOCK_7_START;
if (!s->simulate)
push_block(s, ANONYMOUS);
return true;
}
return false;
}
static bool parse_pipe_table(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
// unused
(void)(valid_symbols);
// PIPE_TABLE_START is zero width
mark_end(s, lexer);
// count number of cells
size_t cell_count = 0;
// also remember if we see starting and ending pipes, as empty headers have
// to have both
bool starting_pipe = false;
bool ending_pipe = false;
bool empty = true;
if (lexer->lookahead == '|') {
starting_pipe = true;
advance(s, lexer);
}
while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
!lexer->eof(lexer)) {
if (lexer->lookahead == '|') {
cell_count++;
ending_pipe = true;
advance(s, lexer);
} else {
if (lexer->lookahead != ' ' && lexer->lookahead != '\t') {
ending_pipe = false;
}
if (lexer->lookahead == '\\') {
advance(s, lexer);
if (is_punctuation((char)lexer->lookahead)) {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
}
}
if (empty && cell_count == 0 && !(starting_pipe && ending_pipe)) {
return false;
}
if (!ending_pipe) {
cell_count++;
}
// check the following line for a delimiter row
// parse a newline
if (lexer->lookahead == '\n') {
advance(s, lexer);
} else if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
return false;
}
s->indentation = 0;
s->column = 0;
for (;;) {
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
} else {
break;
}
}
s->simulate = true;
uint8_t matched_temp = 0;
while (matched_temp < (uint8_t)s->open_blocks.size) {
if (match(s, lexer, s->open_blocks.items[matched_temp])) {
matched_temp++;
} else {
return false;
}
}
// check if delimiter row has the same number of cells and at least one pipe
size_t delimiter_cell_count = 0;
if (lexer->lookahead == '|') {
advance(s, lexer);
}
for (;;) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '|') {
delimiter_cell_count++;
advance(s, lexer);
continue;
}
if (lexer->lookahead == ':') {
advance(s, lexer);
if (lexer->lookahead != '-') {
return false;
}
}
bool had_one_minus = false;
while (lexer->lookahead == '-') {
had_one_minus = true;
advance(s, lexer);
}
if (had_one_minus) {
delimiter_cell_count++;
}
if (lexer->lookahead == ':') {
if (!had_one_minus) {
return false;
}
advance(s, lexer);
}
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(s, lexer);
}
if (lexer->lookahead == '|') {
if (!had_one_minus) {
delimiter_cell_count++;
}
advance(s, lexer);
continue;
}
if (lexer->lookahead != '\r' && lexer->lookahead != '\n') {
return false;
} else {
break;
}
}
// if the cell counts are not equal then this is not a table
if (cell_count != delimiter_cell_count) {
return false;
}
lexer->result_symbol = PIPE_TABLE_START;
return true;
}
static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// A normal tree-sitter rule decided that the current branch is invalid and
// now "requests" an error to stop the branch
if (valid_symbols[TRIGGER_ERROR]) {
return error(lexer);
}
// Close the inner most block after the next line break as requested. See
// `$._close_block` in grammar.js
if (valid_symbols[CLOSE_BLOCK]) {
s->state |= STATE_CLOSE_BLOCK;
lexer->result_symbol = CLOSE_BLOCK;
return true;
}
// if we are at the end of the file and there are still open blocks close
// them all
if (lexer->eof(lexer)) {
if (valid_symbols[TOKEN_EOF]) {
lexer->result_symbol = TOKEN_EOF;
return true;
}
if (s->open_blocks.size > 0) {
lexer->result_symbol = BLOCK_CLOSE;
if (!s->simulate)
pop_block(s);
return true;
}
return false;
}
if (!(s->state & STATE_MATCHING)) {
// Parse any preceeding whitespace and remember its length. This makes a
// lot of parsing quite a bit easier.
for (;;) {
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
} else {
break;
}
}
// We are not matching. This is where the parsing logic for most
// "normal" token is. Most importantly parsing logic for the start of
// new blocks.
if (valid_symbols[INDENTED_CHUNK_START] &&
!valid_symbols[NO_INDENTED_CHUNK]) {
if (s->indentation >= 4 && lexer->lookahead != '\n' &&
lexer->lookahead != '\r') {
lexer->result_symbol = INDENTED_CHUNK_START;
if (!s->simulate)
push_block(s, INDENTED_CODE_BLOCK);
s->indentation -= 4;
return true;
}
}
// Decide which tokens to consider based on the first non-whitespace
// character
switch (lexer->lookahead) {
case '\r':
case '\n':
if (valid_symbols[BLANK_LINE_START]) {
// A blank line token is actually just 0 width, so do not
// consume the characters
lexer->result_symbol = BLANK_LINE_START;
return true;
}
break;
case '`':
// A backtick could mark the beginning or ending of a fenced
// code block.
return parse_fenced_code_block(s, '`', lexer, valid_symbols);
case '~':
// A tilde could mark the beginning or ending of a fenced code
// block.
return parse_fenced_code_block(s, '~', lexer, valid_symbols);
case '*':
// A star could either mark a list item or a thematic break.
// This code is similar to the code for '_' and '+'.
return parse_star(s, lexer, valid_symbols);
case '_':
return parse_thematic_break_underscore(s, lexer, valid_symbols);
case '>':
// A '>' could mark the beginning of a block quote
return parse_block_quote(s, lexer, valid_symbols);
case '#':
// A '#' could mark a atx heading
return parse_atx_heading(s, lexer, valid_symbols);
case '=':
// A '=' could mark a setext underline
return parse_setext_underline(s, lexer, valid_symbols);
case '+':
// A '+' could be a list marker
return parse_plus(s, lexer, valid_symbols);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// A number could be a list marker (if followed by a dot or a
// parenthesis)
return parse_ordered_list_marker(s, lexer, valid_symbols);
case '-':
// A minus could mark a list marker, a thematic break or a
// setext underline
return parse_minus(s, lexer, valid_symbols);
case '<':
// A < could mark the beginning of a html block
return parse_html_block(s, lexer, valid_symbols);
}
if (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
valid_symbols[PIPE_TABLE_START]) {
return parse_pipe_table(s, lexer, valid_symbols);
}
} else { // we are in the state of trying to match all currently open blocks
bool partial_success = false;
while (s->matched < (uint8_t)s->open_blocks.size) {
if (s->matched == (uint8_t)s->open_blocks.size - 1 &&
(s->state & STATE_CLOSE_BLOCK)) {
if (!partial_success)
s->state &= ~STATE_CLOSE_BLOCK;
break;
}
if (match(s, lexer, s->open_blocks.items[s->matched])) {
partial_success = true;
s->matched++;
} else {
if (s->state & STATE_WAS_SOFT_LINE_BREAK) {
s->state &= (~STATE_MATCHING);
}
break;
}
}
if (partial_success) {
if (s->matched == s->open_blocks.size) {
s->state &= (~STATE_MATCHING);
}
lexer->result_symbol = BLOCK_CONTINUATION;
return true;
}
if (!(s->state & STATE_WAS_SOFT_LINE_BREAK)) {
lexer->result_symbol = BLOCK_CLOSE;
pop_block(s);
if (s->matched == s->open_blocks.size) {
s->state &= (~STATE_MATCHING);
}
return true;
}
}
// The parser just encountered a line break. Setup the state correspondingly
if ((valid_symbols[LINE_ENDING] || valid_symbols[SOFT_LINE_ENDING] ||
valid_symbols[PIPE_TABLE_LINE_ENDING]) &&
(lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
if (lexer->lookahead == '\r') {
advance(s, lexer);
if (lexer->lookahead == '\n') {
advance(s, lexer);
}
} else {
advance(s, lexer);
}
s->indentation = 0;
s->column = 0;
if (!(s->state & STATE_CLOSE_BLOCK) &&
(valid_symbols[SOFT_LINE_ENDING] ||
valid_symbols[PIPE_TABLE_LINE_ENDING])) {
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
s->indentation += advance(s, lexer);
} else {
break;
}
}
s->simulate = true;
uint8_t matched_temp = s->matched;
s->matched = 0;
bool one_will_be_matched = false;
while (s->matched < (uint8_t)s->open_blocks.size) {
if (match(s, lexer, s->open_blocks.items[s->matched])) {
s->matched++;
one_will_be_matched = true;
} else {
break;
}
}
bool all_will_be_matched = s->matched == s->open_blocks.size;
if (!lexer->eof(lexer) &&
!scan(s, lexer, paragraph_interrupt_symbols)) {
s->matched = matched_temp;
// If the last line break ended a paragraph and no new block
// opened, the last line break should have been a soft line
// break Reset the counter for matched blocks
s->matched = 0;
s->indentation = 0;
s->column = 0;
// If there is at least one open block, we should be in the
// matching state. Also set the matching flag if a
// `$._soft_line_break_marker` can be emitted so it does get
// emitted.
if (one_will_be_matched) {
s->state |= STATE_MATCHING;
} else {
s->state &= (~STATE_MATCHING);
}
if (valid_symbols[PIPE_TABLE_LINE_ENDING]) {
if (all_will_be_matched) {
lexer->result_symbol = PIPE_TABLE_LINE_ENDING;
return true;
}
} else {
lexer->result_symbol = SOFT_LINE_ENDING;
// reset some state variables
s->state |= STATE_WAS_SOFT_LINE_BREAK;
return true;
}
} else {
s->matched = matched_temp;
}
s->indentation = 0;
s->column = 0;
}
if (valid_symbols[LINE_ENDING]) {
// If the last line break ended a paragraph and no new block opened,
// the last line break should have been a soft line break Reset the
// counter for matched blocks
s->matched = 0;
// If there is at least one open block, we should be in the matching
// state. Also set the matching flag if a
// `$._soft_line_break_marker` can be emitted so it does get
// emitted.
if (s->open_blocks.size > 0) {
s->state |= STATE_MATCHING;
} else {
s->state &= (~STATE_MATCHING);
}
// reset some state variables
s->state &= (~STATE_WAS_SOFT_LINE_BREAK);
lexer->result_symbol = LINE_ENDING;
return true;
}
}
return false;
}
void *tree_sitter_markdown_external_scanner_create(void) {
Scanner *s = (Scanner *)malloc(sizeof(Scanner));
s->open_blocks.items = (Block *)calloc(1, sizeof(Block));
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
_Static_assert(ATX_H6_MARKER == ATX_H1_MARKER + 5, "");
#else
assert(ATX_H6_MARKER == ATX_H1_MARKER + 5);
#endif
deserialize(s, NULL, 0);
return s;
}
bool tree_sitter_markdown_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
scanner->simulate = false;
return scan(scanner, lexer, valid_symbols);
}
unsigned tree_sitter_markdown_external_scanner_serialize(void *payload,
char *buffer) {
Scanner *scanner = (Scanner *)payload;
return serialize(scanner, buffer);
}
void tree_sitter_markdown_external_scanner_deserialize(void *payload,
char *buffer,
unsigned length) {
Scanner *scanner = (Scanner *)payload;
deserialize(scanner, buffer, length);
}
void tree_sitter_markdown_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
free(scanner->open_blocks.items);
free(scanner);
}