File: //home/ubuntu/neovim/.deps/build/src/treesitter_markdown/tree-sitter-markdown/grammar.js
// This grammar only concerns the block structure according to the CommonMark Spec
// (https://spec.commonmark.org/0.30/#blocks-and-inlines)
// For more information see README.md
/// <reference types="tree-sitter-cli/dsl" />
const common = require('../common/grammar.js');
const PRECEDENCE_LEVEL_LINK = common.PRECEDENCE_LEVEL_LINK;
const PUNCTUATION_CHARACTERS_REGEX = '!-/:-@\\[-`\\{-~';
const PUNCTUATION_CHARACTERS_ARRAY = [
'!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<',
'=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~'
];
module.exports = grammar({
name: 'markdown',
rules: {
document: $ => seq(
optional(choice(
common.EXTENSION_MINUS_METADATA ? $.minus_metadata : choice(),
common.EXTENSION_PLUS_METADATA ? $.plus_metadata : choice(),
)),
alias(prec.right(repeat($._block_not_section)), $.section),
repeat($.section),
),
...common.rules,
_last_token_punctuation: $ => choice(), // needed for compatability wiht common rules
// BLOCK STRUCTURE
// All blocks. Every block contains a trailing newline.
_block: $ => choice(
$._block_not_section,
$.section,
),
_block_not_section: $ => choice(
alias($._setext_heading1, $.setext_heading),
alias($._setext_heading2, $.setext_heading),
$.paragraph,
$.indented_code_block,
$.block_quote,
$.thematic_break,
$.list,
$.fenced_code_block,
$._blank_line,
$.html_block,
$.link_reference_definition,
common.EXTENSION_PIPE_TABLE ? $.pipe_table : choice(),
),
section: $ => choice($._section1, $._section2, $._section3, $._section4, $._section5, $._section6),
_section1: $ => prec.right(seq(
alias($._atx_heading1, $.atx_heading),
repeat(choice(
alias(choice($._section6, $._section5, $._section4, $._section3, $._section2), $.section),
$._block_not_section
))
)),
_section2: $ => prec.right(seq(
alias($._atx_heading2, $.atx_heading),
repeat(choice(
alias(choice($._section6, $._section5, $._section4, $._section3), $.section),
$._block_not_section
))
)),
_section3: $ => prec.right(seq(
alias($._atx_heading3, $.atx_heading),
repeat(choice(
alias(choice($._section6, $._section5, $._section4), $.section),
$._block_not_section
))
)),
_section4: $ => prec.right(seq(
alias($._atx_heading4, $.atx_heading),
repeat(choice(
alias(choice($._section6, $._section5), $.section),
$._block_not_section
))
)),
_section5: $ => prec.right(seq(
alias($._atx_heading5, $.atx_heading),
repeat(choice(
alias($._section6, $.section),
$._block_not_section
))
)),
_section6: $ => prec.right(seq(
alias($._atx_heading6, $.atx_heading),
repeat($._block_not_section)
)),
// LEAF BLOCKS
// A thematic break. This is currently handled by the external scanner but maybe could be
// parsed using normal tree-sitter rules.
//
// https://github.github.com/gfm/#thematic-breaks
thematic_break: $ => seq($._thematic_break, choice($._newline, $._eof)),
// An ATX heading. This is currently handled by the external scanner but maybe could be
// parsed using normal tree-sitter rules.
//
// https://github.github.com/gfm/#atx-headings
_atx_heading1: $ => prec(1, seq(
$.atx_h1_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading2: $ => prec(1, seq(
$.atx_h2_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading3: $ => prec(1, seq(
$.atx_h3_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading4: $ => prec(1, seq(
$.atx_h4_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading5: $ => prec(1, seq(
$.atx_h5_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading6: $ => prec(1, seq(
$.atx_h6_marker,
optional($._atx_heading_content),
$._newline
)),
_atx_heading_content: $ => prec(1, seq(
optional($._whitespace),
field('heading_content', alias($._line, $.inline))
)),
// A setext heading. The underlines are currently handled by the external scanner but maybe
// could be parsed using normal tree-sitter rules.
//
// https://github.github.com/gfm/#setext-headings
_setext_heading1: $ => seq(
field('heading_content', $.paragraph),
$.setext_h1_underline,
choice($._newline, $._eof),
),
_setext_heading2: $ => seq(
field('heading_content', $.paragraph),
$.setext_h2_underline,
choice($._newline, $._eof),
),
// An indented code block. An indented code block is made up of indented chunks and blank
// lines. The indented chunks are handeled by the external scanner.
//
// https://github.github.com/gfm/#indented-code-blocks
indented_code_block: $ => prec.right(seq($._indented_chunk, repeat(choice($._indented_chunk, $._blank_line)))),
_indented_chunk: $ => seq($._indented_chunk_start, repeat(choice($._line, $._newline)), $._block_close, optional($.block_continuation)),
// A fenced code block. Fenced code blocks are mainly handled by the external scanner. In
// case of backtick code blocks the external scanner also checks that the info string is
// proper.
//
// https://github.github.com/gfm/#fenced-code-blocks
fenced_code_block: $ => prec.right(choice(
seq(
alias($._fenced_code_block_start_backtick, $.fenced_code_block_delimiter),
optional($._whitespace),
optional($.info_string),
$._newline,
optional($.code_fence_content),
optional(seq(alias($._fenced_code_block_end_backtick, $.fenced_code_block_delimiter), $._close_block, $._newline)),
$._block_close,
),
seq(
alias($._fenced_code_block_start_tilde, $.fenced_code_block_delimiter),
optional($._whitespace),
optional($.info_string),
$._newline,
optional($.code_fence_content),
optional(seq(alias($._fenced_code_block_end_tilde, $.fenced_code_block_delimiter), $._close_block, $._newline)),
$._block_close,
),
)),
code_fence_content: $ => repeat1(choice($._newline, $._line)),
info_string: $ => choice(
seq($.language, repeat(choice($._line, $.backslash_escape, $.entity_reference, $.numeric_character_reference))),
seq(
repeat1(choice('{', '}')),
optional(choice(
seq($.language, repeat(choice($._line, $.backslash_escape, $.entity_reference, $.numeric_character_reference))),
seq($._whitespace, repeat(choice($._line, $.backslash_escape, $.entity_reference, $.numeric_character_reference))),
))
)
),
language: $ => prec.right(repeat1(choice($._word, common.punctuation_without($, ['{', '}', ',']), $.backslash_escape, $.entity_reference, $.numeric_character_reference))),
// An HTML block. We do not emit addition nodes relating to the kind or structure or of the
// html block as this is best done using language injections and a proper html parsers.
//
// See the `build_html_block` function for more information.
// See the spec for the different kinds of html blocks.
//
// https://github.github.com/gfm/#html-blocks
html_block: $ => prec(1, seq(optional($._whitespace), choice(
$._html_block_1,
$._html_block_2,
$._html_block_3,
$._html_block_4,
$._html_block_5,
$._html_block_6,
$._html_block_7,
))),
_html_block_1: $ => build_html_block($,
// new RegExp(
// '[ \t]*<' + regex_case_insensitive_list(HTML_TAG_NAMES_RULE_1) + '([\\r\\n]|[ \\t>][^<\\r\\n]*(\\n|\\r\\n?)?)'
// ),
$._html_block_1_start,
$._html_block_1_end,
true
),
_html_block_2: $ => build_html_block($, $._html_block_2_start, '-->', true),
_html_block_3: $ => build_html_block($, $._html_block_3_start, '?>', true),
_html_block_4: $ => build_html_block($, $._html_block_4_start, '>', true),
_html_block_5: $ => build_html_block($, $._html_block_5_start, ']]>', true),
_html_block_6: $ => build_html_block(
$,
$._html_block_6_start,
seq($._newline, $._blank_line),
true
),
_html_block_7: $ => build_html_block(
$,
$._html_block_7_start,
seq($._newline, $._blank_line),
false
),
// A link reference definition. We need to make sure that this is not mistaken for a
// paragraph or indented chunk. The `$._no_indented_chunk` token is used to tell the
// external scanner not to allow indented chunks when the `$.link_title` of the link
// reference definition would be valid.
//
// https://github.github.com/gfm/#link-reference-definitions
link_reference_definition: $ => prec.dynamic(PRECEDENCE_LEVEL_LINK, seq(
optional($._whitespace),
$.link_label,
':',
optional(seq(optional($._whitespace), optional(seq($._soft_line_break, optional($._whitespace))))),
$.link_destination,
optional(prec.dynamic(2 * PRECEDENCE_LEVEL_LINK, seq(
choice(
seq($._whitespace, optional(seq($._soft_line_break, optional($._whitespace)))),
seq($._soft_line_break, optional($._whitespace)),
),
optional($._no_indented_chunk),
$.link_title
))),
choice($._newline, $._soft_line_break, $._eof),
)),
_text_inline_no_link: $ => choice($._word, $._whitespace, common.punctuation_without($, ['[', ']'])),
// A paragraph. The parsing tactic for deciding when a paragraph ends is as follows:
// on every newline inside a paragraph a conflict is triggered manually using
// `$._split_token` to split the parse state into two branches.
//
// One of them - the one that also contains a `$._soft_line_break_marker` will try to
// continue the paragraph, but we make sure that the beginning of a new block that can
// interrupt a paragraph can also be parsed. If this is the case we know that the paragraph
// should have been closed and the external parser will emit an `$._error` to kill the parse
// branch.
//
// The other parse branch consideres the paragraph to be over. It will be killed if no valid new
// block is detected before the next newline. (For example it will also be killed if a indented
// code block is detected, which cannot interrupt paragraphs).
//
// Either way, after the next newline only one branch will exist, so the ammount of branches
// related to paragraphs ending does not grow.
//
// https://github.github.com/gfm/#paragraphs
paragraph: $ => seq(alias(repeat1(choice($._line, $._soft_line_break)), $.inline), choice($._newline, $._eof)),
// A blank line including the following newline.
//
// https://github.github.com/gfm/#blank-lines
_blank_line: $ => seq($._blank_line_start, choice($._newline, $._eof)),
// CONTAINER BLOCKS
// A block quote. This is the most basic example of a container block handled by the
// external scanner.
//
// https://github.github.com/gfm/#block-quotes
block_quote: $ => seq(
alias($._block_quote_start, $.block_quote_marker),
optional($.block_continuation),
repeat($._block),
$._block_close,
optional($.block_continuation)
),
// A list. This grammar does not differentiate between loose and tight lists for efficiency
// reasons.
//
// Lists can only contain list items with list markers of the same type. List items are
// handled by the external scanner.
//
// https://github.github.com/gfm/#lists
list: $ => prec.right(choice(
$._list_plus,
$._list_minus,
$._list_star,
$._list_dot,
$._list_parenthesis
)),
_list_plus: $ => prec.right(repeat1(alias($._list_item_plus, $.list_item))),
_list_minus: $ => prec.right(repeat1(alias($._list_item_minus, $.list_item))),
_list_star: $ => prec.right(repeat1(alias($._list_item_star, $.list_item))),
_list_dot: $ => prec.right(repeat1(alias($._list_item_dot, $.list_item))),
_list_parenthesis: $ => prec.right(repeat1(alias($._list_item_parenthesis, $.list_item))),
// Some list items can not interrupt a paragraph and are marked as such by the external
// scanner.
list_marker_plus: $ => choice($._list_marker_plus, $._list_marker_plus_dont_interrupt),
list_marker_minus: $ => choice($._list_marker_minus, $._list_marker_minus_dont_interrupt),
list_marker_star: $ => choice($._list_marker_star, $._list_marker_star_dont_interrupt),
list_marker_dot: $ => choice($._list_marker_dot, $._list_marker_dot_dont_interrupt),
list_marker_parenthesis: $ => choice($._list_marker_parenthesis, $._list_marker_parenthesis_dont_interrupt),
_list_item_plus: $ => seq(
$.list_marker_plus,
optional($.block_continuation),
$._list_item_content,
$._block_close,
optional($.block_continuation)
),
_list_item_minus: $ => seq(
$.list_marker_minus,
optional($.block_continuation),
$._list_item_content,
$._block_close,
optional($.block_continuation)
),
_list_item_star: $ => seq(
$.list_marker_star,
optional($.block_continuation),
$._list_item_content,
$._block_close,
optional($.block_continuation)
),
_list_item_dot: $ => seq(
$.list_marker_dot,
optional($.block_continuation),
$._list_item_content,
$._block_close,
optional($.block_continuation)
),
_list_item_parenthesis: $ => seq(
$.list_marker_parenthesis,
optional($.block_continuation),
$._list_item_content,
$._block_close,
optional($.block_continuation)
),
// List items are closed after two consecutive blank lines
_list_item_content: $ => choice(
prec(1, seq(
$._blank_line,
$._blank_line,
$._close_block,
optional($.block_continuation)
)),
repeat1($._block),
common.EXTENSION_TASK_LIST ? prec(1, seq(
choice($.task_list_marker_checked, $.task_list_marker_unchecked),
$._whitespace,
$.paragraph,
repeat($._block)
)) : choice()
),
// Newlines as in the spec. Parsing a newline triggers the matching process by making
// the external parser emit a `$._line_ending`.
_newline: $ => seq(
$._line_ending,
optional($.block_continuation)
),
_soft_line_break: $ => seq(
$._soft_line_ending,
optional($.block_continuation)
),
// Some symbols get parsed as single tokens so that html blocks get detected properly
_line: $ => prec.right(repeat1(choice($._word, $._whitespace, common.punctuation_without($, [])))),
_word: $ => choice(
new RegExp('[^' + PUNCTUATION_CHARACTERS_REGEX + ' \\t\\n\\r]+'),
common.EXTENSION_TASK_LIST ? choice(
/\[[xX]\]/,
/\[[ \t]\]/,
) : choice()
),
// The external scanner emits some characters that should just be ignored.
_whitespace: $ => /[ \t]+/,
...(common.EXTENSION_TASK_LIST ? {
task_list_marker_checked: $ => prec(1, /\[[xX]\]/),
task_list_marker_unchecked: $ => prec(1, /\[[ \t]\]/),
} : {}),
...(common.EXTENSION_PIPE_TABLE ? {
pipe_table: $ => prec.right(seq(
$._pipe_table_start,
alias($.pipe_table_row, $.pipe_table_header),
$._newline,
$.pipe_table_delimiter_row,
repeat(seq($._pipe_table_newline, optional($.pipe_table_row))),
choice($._newline, $._eof),
)),
_pipe_table_newline: $ => seq(
$._pipe_table_line_ending,
optional($.block_continuation)
),
pipe_table_delimiter_row: $ => seq(
optional(seq(
optional($._whitespace),
'|',
)),
repeat1(prec.right(seq(
optional($._whitespace),
$.pipe_table_delimiter_cell,
optional($._whitespace),
'|',
))),
optional($._whitespace),
optional(seq(
$.pipe_table_delimiter_cell,
optional($._whitespace)
)),
),
pipe_table_delimiter_cell: $ => seq(
optional(alias(':', $.pipe_table_align_left)),
repeat1('-'),
optional(alias(':', $.pipe_table_align_right)),
),
pipe_table_row: $ => seq(
optional(seq(
optional($._whitespace),
'|',
)),
choice(
seq(
repeat1(prec.right(seq(
choice(
seq(
optional($._whitespace),
$.pipe_table_cell,
optional($._whitespace)
),
alias($._whitespace, $.pipe_table_cell)
),
'|',
))),
optional($._whitespace),
optional(seq(
$.pipe_table_cell,
optional($._whitespace)
)),
),
seq(
optional($._whitespace),
$.pipe_table_cell,
optional($._whitespace)
)
),
),
pipe_table_cell: $ => prec.right(seq(
choice(
$._word,
$._backslash_escape,
common.punctuation_without($, ['|']),
),
repeat(choice(
$._word,
$._whitespace,
$._backslash_escape,
common.punctuation_without($, ['|']),
)),
)),
} : {}),
},
externals: $ => [
// Quite a few of these tokens could maybe be implemented without use of the external parser.
// For this the `$._open_block` and `$._close_block` tokens should be used to tell the external
// parser to put a new anonymous block on the block stack.
// Block structure gets parsed as follows: After every newline (`$._line_ending`) we try to match
// as many open blocks as possible. For example if the last line was part of a block quote we look
// for a `>` at the beginning of the next line. We emit a `$.block_continuation` for each matched
// block. For this process the external scanner keeps a stack of currently open blocks.
//
// If we are not able to match all blocks that does not necessarily mean that all unmatched blocks
// have to be closed. It could also mean that the line is a lazy continuation line
// (https://github.github.com/gfm/#lazy-continuation-line, see also `$._split_token` and
// `$._soft_line_break_marker` below)
//
// If a block does get closed (because it was not matched or because some closing token was
// encountered) we emit a `$._block_close` token
$._line_ending, // this token does not contain the actual newline characters. see `$._newline`
$._soft_line_ending,
$._block_close,
$.block_continuation,
// Tokens signifying the start of a block. Blocks that do not need a `$._block_close` because they
// always span one line are marked as such.
$._block_quote_start,
$._indented_chunk_start,
$.atx_h1_marker, // atx headings do not need a `$._block_close`
$.atx_h2_marker,
$.atx_h3_marker,
$.atx_h4_marker,
$.atx_h5_marker,
$.atx_h6_marker,
$.setext_h1_underline, // setext headings do not need a `$._block_close`
$.setext_h2_underline,
$._thematic_break, // thematic breaks do not need a `$._block_close`
$._list_marker_minus,
$._list_marker_plus,
$._list_marker_star,
$._list_marker_parenthesis,
$._list_marker_dot,
$._list_marker_minus_dont_interrupt, // list items that do not interrupt an ongoing paragraph
$._list_marker_plus_dont_interrupt,
$._list_marker_star_dont_interrupt,
$._list_marker_parenthesis_dont_interrupt,
$._list_marker_dot_dont_interrupt,
$._fenced_code_block_start_backtick,
$._fenced_code_block_start_tilde,
$._blank_line_start, // Does not contain the newline characters. Blank lines do not need a `$._block_close`
// Special tokens for block structure
// Closing backticks or tildas for a fenced code block. They are used to trigger a `$._close_block`
// which in turn will trigger a `$._block_close` at the beginning the following line.
$._fenced_code_block_end_backtick,
$._fenced_code_block_end_tilde,
$._html_block_1_start,
$._html_block_1_end,
$._html_block_2_start,
$._html_block_3_start,
$._html_block_4_start,
$._html_block_5_start,
$._html_block_6_start,
$._html_block_7_start,
// Similarly this is used if the closing of a block is not decided by the external parser.
// A `$._block_close` will be emitted at the beginning of the next line. Notice that a
// `$._block_close` can also get emitted if the parent block closes.
$._close_block,
// This is a workaround so the external parser does not try to open indented blocks when
// parsing a link reference definition.
$._no_indented_chunk,
// An `$._error` token is never valid and gets emmited to kill invalid parse branches. Concretely
// this is used to decide wether a newline closes a paragraph and together and it gets emitted
// when trying to parse the `$._trigger_error` token in `$.link_title`.
$._error,
$._trigger_error,
$._eof,
$.minus_metadata,
$.plus_metadata,
$._pipe_table_start,
$._pipe_table_line_ending,
],
precedences: $ => [
[$._setext_heading1, $._block],
[$._setext_heading2, $._block],
[$.indented_code_block, $._block],
],
conflicts: $ => [
[$.link_reference_definition],
[$.link_label, $._line],
[$.link_reference_definition, $._line],
],
extras: $ => [],
});
// General purpose structure for html blocks. The different kinds mostly work the same but have
// different openling and closing conditions. Some html blocks may not interrupt a paragraph and
// have to be marked as such.
function build_html_block($, open, close, interrupt_paragraph) {
return seq(
open,
repeat(choice(
$._line,
$._newline,
seq(close, $._close_block),
)),
$._block_close,
optional($.block_continuation),
);
}