HEX
Server: Apache/2.4.41 (Ubuntu)
System: Linux ip-172-31-42-149 5.15.0-1084-aws #91~20.04.1-Ubuntu SMP Fri May 2 07:00:04 UTC 2025 aarch64
User: ubuntu (1000)
PHP: 7.4.33
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
Upload Files
File: //home/ubuntu/neovim/.deps/build/src/treesitter/script/generate-unicode-categories-json
#!/usr/bin/env node

// This script generates a JSON file that is used by the CLI to handle unicode property escapes.

const CATEGORY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-categories.json'
const PROPERTY_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-properties.json'
const CATEGORY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-category-aliases.json'
const PROPERTY_ALIAS_OUTPUT_PATH = './cli/src/generate/prepare_grammar/unicode-property-aliases.json'

const UNICODE_STANDARD_VERSION = '15.1.0';
const CATEGORY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/UnicodeData.txt`
const PROPERTY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropList.txt`
const DERIVED_PROPERTY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/DerivedCoreProperties.txt`
const CATEGORY_ALIAS_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropertyValueAliases.txt`
const PROPERTY_ALIAS_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropertyAliases.txt`
const EMOJI_DATA_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/emoji/emoji-data.txt`

const fs = require('fs');
const path = require('path');
const { spawnSync } = require('child_process');

// Download the unicode data files, caching them inside the 'target' directory.
const categoryData = cachedDownload(CATEGORY_URL);
const propertyData = cachedDownload(PROPERTY_URL);
const derivedPropertyData = cachedDownload(DERIVED_PROPERTY_URL);
const categoryAliasData = cachedDownload(CATEGORY_ALIAS_URL);
const propertyAliasData = cachedDownload(PROPERTY_ALIAS_URL);
const emojiData = cachedDownload(EMOJI_DATA_URL);
function cachedDownload(url) {
  console.log(`Downloading ${url}`);
  let downloadPath = path.join('.', 'target', path.basename(url) + `.${UNICODE_STANDARD_VERSION}`)
  if (fs.existsSync(downloadPath)) {
    return fs.readFileSync(downloadPath, 'utf8');
  } else {
    const data = spawnSync('curl', [url], { encoding: 'utf8' }).stdout;
    fs.writeFileSync(downloadPath, data, 'utf8');
    return data;
  }
}

const categories = {};
const properties = {};
const categoryAliases = {};
const propertyAliases = {}
let data, row, lineStart, lineEnd;

// Parse the properties
data = propertyData + derivedPropertyData + emojiData;
row = 0;
lineStart = 0;
lineEnd = -1;
const CODE_POINT = /[0-9A-Fa-f]/
while (lineStart < data.length) {
  row++;
  lineStart = lineEnd + 1;
  lineEnd = data.indexOf('\n', lineStart);
  if (lineEnd === -1) break;

  // Skip over blank and comment lines
  if (!CODE_POINT.test(data[lineStart])) continue;

  // Parse the first two semicolon fields:
  // * code point or code point range
  // * property
  const codePointEnd = data.indexOf(';', lineStart);
  const propertyStart = codePointEnd + 1;
  const propertyEnd = data.indexOf('#', propertyStart);

  if (
    codePointEnd === -1 ||
    propertyEnd === -1
  ) {
    throw new Error(`Unexpected format on line ${row}`);
  }

  // Process ranges (separated by '..)
  const codePoints = data.slice(lineStart, codePointEnd).trim()
    .split('..')
    .map(p => parseInt(p, 16));
  if (codePoints.length === 1) {
    codePoints.push(codePoints[0]);
  }

  const property = data.slice(propertyStart, propertyEnd).trim();

  console.log("Property:", codePoints, property);


  for (let c = codePoints[0]; c <= codePoints[1]; c++) {
    if (!properties[property]) {
      properties[property] = [];
    }
    properties[property].push(c);
  }
}

// Parse the categories.
// Each line represents a code point.
data = categoryData;
row = 0;
lineStart = 0;
lineEnd = -1;
while (lineStart < data.length) {
  row++;
  lineStart = lineEnd + 1;
  lineEnd = data.indexOf('\n', lineStart);
  if (lineEnd === -1) break;

  // Parse the first three semicolon-separated fields:
  // * code point (hexadecimal)
  // * name
  // * category
  const codePointEnd = data.indexOf(';', lineStart);
  const nameStart = codePointEnd + 1;
  const nameEnd = data.indexOf(';', nameStart);
  const categoryStart = nameEnd + 1;
  const categoryEnd = data.indexOf(';', categoryStart)
  if (
    nameStart === 0 ||
    categoryStart == 0 ||
    categoryEnd === -1
  ) {
    throw new Error(`Unexpected format on line ${row}`);
  }

  const codePoint = parseInt(data.slice(lineStart, codePointEnd), 16);
  const name = data.slice(nameStart, nameEnd);
  const category = data.slice(categoryStart, categoryEnd);

  console.log("Category:", codePoint, category, name);

  // Group the code points by their category.
  if (!categories[category]) {
    categories[category] = [];
  }
  categories[category].push(codePoint);
}

// Parse the category aliases
data = categoryAliasData;
row = 0;
lineStart = 0;
lineEnd = -1;
const IGNORE = /[#\s]/
while (lineStart < data.length) {
  row++;
  lineStart = lineEnd + 1;
  lineEnd = data.indexOf('\n', lineStart);
  if (lineEnd === -1) break;

  // Skip over blank and comment lines
  if (IGNORE.test(data[lineStart])) continue;

  // Parse the first three semicolon-separated fields:
  // * property value type
  // * short name
  // * long name
  // Other aliases may be listed in additional fields
  const propertyValueTypeEnd = data.indexOf(';', lineStart);
  const shortNameStart = propertyValueTypeEnd + 1;
  const shortNameEnd = data.indexOf(';', shortNameStart);
  const longNameStart = shortNameEnd + 1;
  if (
    shortNameStart === 0 ||
    longNameStart === 0
  ) {
    throw new Error(`Unexpected format on line ${row}`);
  }

  const propertyValueType = data.slice(lineStart, propertyValueTypeEnd).trim();
  const shortName = data.slice(shortNameStart, shortNameEnd).trim();

  // Filter for General_Category lines
  if (propertyValueType !== 'gc') continue;

  let aliasStart = longNameStart;
  let lineDone = false;
  do {
    let aliasEnd = data.indexOf(';', aliasStart);
    if (aliasEnd === -1 || aliasEnd > lineEnd) {
      aliasEnd = data.indexOf('#', aliasStart);
      if (aliasEnd === -1 || aliasEnd > lineEnd) {
        aliasEnd = lineEnd;
      }
      lineDone = true;
    }
    const alias = data.slice(aliasStart, aliasEnd).trim();
    console.log("Category alias:", alias, shortName);
    categoryAliases[alias] = shortName;
    aliasStart = aliasEnd + 1;
  } while (!lineDone);
}

// Parse the property aliases
data = propertyAliasData;
row = 0;
lineStart = 0;
lineEnd = -1;
while (lineStart < data.length) {
  row++;
  lineStart = lineEnd + 1;
  lineEnd = data.indexOf('\n', lineStart);
  if (lineEnd === -1) break;

  // Skip over blank and comment lines
  if (IGNORE.test(data[lineStart])) continue;

  // Parse the first two semicolon fields:
  // * short name
  // * long name
  const shortNameEnd = data.indexOf(';', lineStart);
  const longNameStart = shortNameEnd + 1;

  if (longNameStart == 0) {
    throw new Error(`Unexpected format on line ${row}`);
  }

  let alias = data.slice(lineStart, shortNameEnd).trim();
  let longName = null;
  let nameStart = longNameStart;
  let lineDone = false;
  do {
    let nameEnd = data.indexOf(';', nameStart);
    if (nameEnd === -1 || nameEnd > lineEnd) {
      nameEnd = data.indexOf('#', nameStart);
      if (nameEnd === -1 || nameEnd > lineEnd) {
        nameEnd = lineEnd;
      }
      lineDone = true;
    }
    if (longName == null) {
      longName = data.slice(nameStart, nameEnd).trim();
    } else {
      alias = data.slice(nameStart, nameEnd).trim();
    }
    console.log("Property alias:", alias, longName);
    propertyAliases[alias] = longName;
    nameStart = nameEnd + 1;
  } while (!lineDone);
}

fs.writeFileSync(CATEGORY_OUTPUT_PATH, JSON.stringify(categories), 'utf8');
fs.writeFileSync(PROPERTY_OUTPUT_PATH, JSON.stringify(properties), 'utf8');
fs.writeFileSync(CATEGORY_ALIAS_OUTPUT_PATH, JSON.stringify(categoryAliases), 'utf8');
fs.writeFileSync(PROPERTY_ALIAS_OUTPUT_PATH, JSON.stringify(propertyAliases), 'utf8');