File: //home/ubuntu/neovim/.deps/build/src/treesitter_markdown/bindings/rust/lib.rs
//! This crate provides markdown language support for the [tree-sitter][] parsing library.
//!
//! It contains two grammars: [`language`] to parse the block structure of markdown documents and
//! [`inline_language`] to parse inline content.
//!
//! It also supplies [`MarkdownParser`] as a convenience wrapper around the two grammars.
//! [`MarkdownParser::parse`] returns a [`MarkdownTree`] instread of a [`Tree`][Tree]. This struct
//! contains a block tree and an inline tree for each node in the block tree that has inline
//! content
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [Tree]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Tree.html
//! [tree-sitter]: https://tree-sitter.github.io/
use std::{collections::HashMap, num::NonZeroU16};
use tree_sitter::{InputEdit, Language, Node, Parser, Point, Range, Tree, TreeCursor};
extern "C" {
fn tree_sitter_markdown() -> Language;
fn tree_sitter_markdown_inline() -> Language;
}
/// Get the tree-sitter [Language][] for the block grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_markdown() }
}
/// Get the tree-sitter [Language][] for the inline grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn inline_language() -> Language {
unsafe { tree_sitter_markdown_inline() }
}
pub const HIGHLIGHT_QUERY_BLOCK: &str =
include_str!("../../tree-sitter-markdown/queries/highlights.scm");
pub const INJECTION_QUERY_BLOCK: &str =
include_str!("../../tree-sitter-markdown/queries/injections.scm");
pub const HIGHLIGHT_QUERY_INLINE: &str =
include_str!("../../tree-sitter-markdown-inline/queries/highlights.scm");
pub const INJECTION_QUERY_INLINE: &str =
include_str!("../../tree-sitter-markdown-inline/queries/injections.scm");
/// The content of the [`node-types.json`][] file for the block grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES_BLOCK: &str = include_str!("../../tree-sitter-markdown/src/node-types.json");
/// The content of the [`node-types.json`][] file for the inline grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES_INLINE: &str =
include_str!("../../tree-sitter-markdown-inline/src/node-types.json");
/// A parser that produces [`MarkdownTree`]s.
///
/// This is a convenience wrapper around [`language`] and [`inline_language`].
pub struct MarkdownParser {
parser: Parser,
block_language: Language,
inline_language: Language,
}
/// A stateful object for walking a [`MarkdownTree`] efficiently.
///
/// This exposes the same methdos as [`TreeCursor`], but abstracts away the
/// double block / inline structure of [`MarkdownTree`].
pub struct MarkdownCursor<'a> {
markdown_tree: &'a MarkdownTree,
block_cursor: TreeCursor<'a>,
inline_cursor: Option<TreeCursor<'a>>,
}
impl<'a> MarkdownCursor<'a> {
/// Get the cursor's current [`Node`].
pub fn node(&self) -> Node<'a> {
match &self.inline_cursor {
Some(cursor) => cursor.node(),
None => self.block_cursor.node(),
}
}
/// Returns `true` if the current node is from the (inline language)[inline_language]
///
/// This information is needed to handle "tree-sitter internal" data like
/// [`field_id`](Self::field_id) correctly.
pub fn is_inline(&self) -> bool {
self.inline_cursor.is_some()
}
/// Get the numerical field id of this tree cursor’s current node.
///
/// You will need to call [`is_inline`](Self::is_inline) to find out if the
/// current node is an inline or block node.
///
/// See also [`field_name`](Self::field_name).
pub fn field_id(&self) -> Option<NonZeroU16> {
match &self.inline_cursor {
Some(cursor) => cursor.field_id(),
None => self.block_cursor.field_id(),
}
}
/// Get the field name of this tree cursor’s current node.
///
/// You will need to call [`is_inline`](Self::is_inline) to find out if the
/// current node is an inline or block node.
pub fn field_name(&self) -> Option<&'static str> {
match &self.inline_cursor {
Some(cursor) => cursor.field_name(),
None => self.block_cursor.field_name(),
}
}
fn move_to_inline_tree(&mut self) -> bool {
let node = self.block_cursor.node();
match node.kind() {
"inline" | "pipe_table_cell" => {
if let Some(inline_tree) = self.markdown_tree.inline_tree(&node) {
self.inline_cursor = Some(inline_tree.walk());
return true;
}
}
_ => (),
}
false
}
fn move_to_block_tree(&mut self) {
self.inline_cursor = None;
}
/// Move this cursor to the first child of its current node.
///
/// This returns `true` if the cursor successfully moved, and returns `false` if there were no
/// children.
/// If the cursor is currently at a node in the block tree and it has an associated inline tree, it
/// will descend into the inline tree.
pub fn goto_first_child(&mut self) -> bool {
match &mut self.inline_cursor {
Some(cursor) => cursor.goto_first_child(),
None => {
if self.move_to_inline_tree() {
if !self.inline_cursor.as_mut().unwrap().goto_first_child() {
self.move_to_block_tree();
false
} else {
true
}
} else {
self.block_cursor.goto_first_child()
}
}
}
}
/// Move this cursor to the parent of its current node.
///
/// This returns true if the cursor successfully moved, and returns false if there was no
/// parent node (the cursor was already on the root node).
/// If the cursor moves to the root node of an inline tree, the it ascents to the associated
/// node in the block tree.
pub fn goto_parent(&mut self) -> bool {
match &mut self.inline_cursor {
Some(inline_cursor) => {
inline_cursor.goto_parent();
if inline_cursor.node().parent().is_none() {
self.move_to_block_tree();
}
true
}
None => self.block_cursor.goto_parent(),
}
}
/// Move this cursor to the next sibling of its current node.
///
/// This returns true if the cursor successfully moved, and returns false if there was no next
/// sibling node.
pub fn goto_next_sibling(&mut self) -> bool {
match &mut self.inline_cursor {
Some(inline_cursor) => inline_cursor.goto_next_sibling(),
None => self.block_cursor.goto_next_sibling(),
}
}
/// Move this cursor to the first child of its current node that extends beyond the given byte offset.
///
/// This returns the index of the child node if one was found, and returns None if no such child was found.
/// If the cursor is currently at a node in the block tree and it has an associated inline tree, it
/// will descend into the inline tree.
pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option<usize> {
match &mut self.inline_cursor {
Some(cursor) => cursor.goto_first_child_for_byte(index),
None => {
if self.move_to_inline_tree() {
self.inline_cursor
.as_mut()
.unwrap()
.goto_first_child_for_byte(index)
} else {
self.block_cursor.goto_first_child_for_byte(index)
}
}
}
}
/// Move this cursor to the first child of its current node that extends beyond the given point.
///
/// This returns the index of the child node if one was found, and returns None if no such child was found.
/// If the cursor is currently at a node in the block tree and it has an associated inline tree, it
/// will descend into the inline tree.
pub fn goto_first_child_for_point(&mut self, index: Point) -> Option<usize> {
match &mut self.inline_cursor {
Some(cursor) => cursor.goto_first_child_for_point(index),
None => {
if self.move_to_inline_tree() {
self.inline_cursor
.as_mut()
.unwrap()
.goto_first_child_for_point(index)
} else {
self.block_cursor.goto_first_child_for_point(index)
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct MarkdownTree {
block_tree: Tree,
inline_trees: Vec<Tree>,
inline_indices: HashMap<usize, usize>,
}
impl MarkdownTree {
/// Edit the block tree and inline trees to keep them in sync with source code that has been
/// edited.
///
/// You must describe the edit both in terms of byte offsets and in terms of
/// row/column coordinates.
pub fn edit(&mut self, edit: &InputEdit) {
self.block_tree.edit(edit);
for inline_tree in self.inline_trees.iter_mut() {
inline_tree.edit(edit);
}
}
/// Returns the block tree for the parsed document
pub fn block_tree(&self) -> &Tree {
&self.block_tree
}
/// Returns the inline tree for the given inline node.
///
/// Returns `None` if the given node does not have an associated inline tree. Either because
/// the nodes type is not `inline` or because the inline content is empty.
pub fn inline_tree(&self, parent: &Node) -> Option<&Tree> {
let index = *self.inline_indices.get(&parent.id())?;
Some(&self.inline_trees[index])
}
/// Returns the list of all inline trees
pub fn inline_trees(&self) -> &[Tree] {
&self.inline_trees
}
/// Create a new [`MarkdownCursor`] starting from the root of the tree.
pub fn walk(&self) -> MarkdownCursor {
MarkdownCursor {
markdown_tree: self,
block_cursor: self.block_tree.walk(),
inline_cursor: None,
}
}
}
impl Default for MarkdownParser {
fn default() -> Self {
let block_language = language();
let inline_language = inline_language();
let parser = Parser::new();
MarkdownParser {
parser,
block_language,
inline_language,
}
}
}
impl MarkdownParser {
/// Parse a slice of UTF8 text.
///
/// # Arguments:
/// * `text` The UTF8-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [MarkdownTree::edit].
///
/// Returns a [MarkdownTree] if parsing succeeded, or `None` if:
/// * The timeout set with [tree_sitter::Parser::set_timeout_micros] expired
/// * The cancellation flag set with [tree_sitter::Parser::set_cancellation_flag] was flipped
pub fn parse_with<T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>(
&mut self,
callback: &mut F,
old_tree: Option<&MarkdownTree>,
) -> Option<MarkdownTree> {
let MarkdownParser {
parser,
block_language,
inline_language,
} = self;
parser
.set_included_ranges(&[])
.expect("Can not set included ranges to whole document");
parser
.set_language(block_language)
.expect("Could not load block grammar");
let block_tree = parser.parse_with(callback, old_tree.map(|tree| &tree.block_tree))?;
let (mut inline_trees, mut inline_indices) = if let Some(old_tree) = old_tree {
let len = old_tree.inline_trees.len();
(Vec::with_capacity(len), HashMap::with_capacity(len))
} else {
(Vec::new(), HashMap::new())
};
parser
.set_language(inline_language)
.expect("Could not load inline grammar");
let mut tree_cursor = block_tree.walk();
let mut i = 0;
'outer: loop {
let node = loop {
let kind = tree_cursor.node().kind();
if kind == "inline" || kind == "pipe_table_cell" || !tree_cursor.goto_first_child()
{
while !tree_cursor.goto_next_sibling() {
if !tree_cursor.goto_parent() {
break 'outer;
}
}
}
let kind = tree_cursor.node().kind();
if kind == "inline" || kind == "pipe_table_cell" {
break tree_cursor.node();
}
};
let mut range = node.range();
let mut ranges = Vec::new();
if tree_cursor.goto_first_child() {
while tree_cursor.goto_next_sibling() {
if !tree_cursor.node().is_named() {
continue;
}
let child_range = tree_cursor.node().range();
ranges.push(Range {
start_byte: range.start_byte,
start_point: range.start_point,
end_byte: child_range.start_byte,
end_point: child_range.start_point,
});
range.start_byte = child_range.end_byte;
range.start_point = child_range.end_point;
}
tree_cursor.goto_parent();
}
ranges.push(range);
parser.set_included_ranges(&ranges).ok()?;
let inline_tree = parser.parse_with(
callback,
old_tree.and_then(|old_tree| old_tree.inline_trees.get(i)),
)?;
inline_trees.push(inline_tree);
inline_indices.insert(node.id(), i);
i += 1;
}
drop(tree_cursor);
inline_trees.shrink_to_fit();
inline_indices.shrink_to_fit();
Some(MarkdownTree {
block_tree,
inline_trees,
inline_indices,
})
}
/// Parse a slice of UTF8 text.
///
/// # Arguments:
/// * `text` The UTF8-encoded text to parse.
/// * `old_tree` A previous syntax tree parsed from the same document.
/// If the text of the document has changed since `old_tree` was
/// created, then you must edit `old_tree` to match the new text using
/// [MarkdownTree::edit].
///
/// Returns a [MarkdownTree] if parsing succeeded, or `None` if:
/// * The timeout set with [tree_sitter::Parser::set_timeout_micros] expired
/// * The cancellation flag set with [tree_sitter::Parser::set_cancellation_flag] was flipped
pub fn parse(&mut self, text: &[u8], old_tree: Option<&MarkdownTree>) -> Option<MarkdownTree> {
self.parse_with(&mut |byte, _| &text[byte..], old_tree)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&language())
.expect("Error loading Markdown language");
let mut inline_parser = tree_sitter::Parser::new();
inline_parser
.set_language(&inline_language())
.expect("Error loading Markdown language");
}
#[test]
fn inline_ranges() {
let code = "# title\n\nInline [content].\n";
let mut parser = MarkdownParser::default();
let mut tree = parser.parse(code.as_bytes(), None).unwrap();
let section = tree.block_tree().root_node().child(0).unwrap();
assert_eq!(section.kind(), "section");
let heading = section.child(0).unwrap();
assert_eq!(heading.kind(), "atx_heading");
let paragraph = section.child(1).unwrap();
assert_eq!(paragraph.kind(), "paragraph");
let inline = paragraph.child(0).unwrap();
assert_eq!(inline.kind(), "inline");
assert_eq!(
tree.inline_tree(&inline)
.unwrap()
.root_node()
.child(0)
.unwrap()
.kind(),
"shortcut_link"
);
let code = "# Title\n\nInline [content].\n";
tree.edit(&InputEdit {
start_byte: 2,
old_end_byte: 3,
new_end_byte: 3,
start_position: Point { row: 0, column: 2 },
old_end_position: Point { row: 0, column: 3 },
new_end_position: Point { row: 0, column: 3 },
});
let tree = parser.parse(code.as_bytes(), Some(&tree)).unwrap();
let section = tree.block_tree().root_node().child(0).unwrap();
assert_eq!(section.kind(), "section");
let heading = section.child(0).unwrap();
assert_eq!(heading.kind(), "atx_heading");
let paragraph = section.child(1).unwrap();
assert_eq!(paragraph.kind(), "paragraph");
let inline = paragraph.child(0).unwrap();
assert_eq!(inline.kind(), "inline");
assert_eq!(
tree.inline_tree(&inline)
.unwrap()
.root_node()
.named_child(0)
.unwrap()
.kind(),
"shortcut_link"
);
}
#[test]
fn markdown_cursor() {
let code = "# title\n\nInline [content].\n";
let mut parser = MarkdownParser::default();
let tree = parser.parse(code.as_bytes(), None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "document");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "section");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "atx_heading");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "paragraph");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "inline");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "shortcut_link");
assert!(cursor.goto_parent());
assert!(cursor.goto_parent());
assert!(cursor.goto_parent());
assert!(cursor.goto_parent());
assert_eq!(cursor.node().kind(), "document");
}
#[test]
fn table() {
let code = "| foo |\n| --- |\n| *bar*|\n";
let mut parser = MarkdownParser::default();
let tree = parser.parse(code.as_bytes(), None).unwrap();
dbg!(&tree.inline_trees);
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "document");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "section");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "pipe_table");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "pipe_table_row");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "pipe_table_cell");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "emphasis");
}
}