494 lines
15 KiB
C++
494 lines
15 KiB
C++
|
/**************************************************************************/
|
||
|
/* gdscript_tokenizer_buffer.cpp */
|
||
|
/**************************************************************************/
|
||
|
/* This file is part of: */
|
||
|
/* GODOT ENGINE */
|
||
|
/* https://godotengine.org */
|
||
|
/**************************************************************************/
|
||
|
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||
|
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||
|
/* */
|
||
|
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||
|
/* a copy of this software and associated documentation files (the */
|
||
|
/* "Software"), to deal in the Software without restriction, including */
|
||
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||
|
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||
|
/* permit persons to whom the Software is furnished to do so, subject to */
|
||
|
/* the following conditions: */
|
||
|
/* */
|
||
|
/* The above copyright notice and this permission notice shall be */
|
||
|
/* included in all copies or substantial portions of the Software. */
|
||
|
/* */
|
||
|
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||
|
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||
|
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||
|
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||
|
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||
|
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||
|
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||
|
/**************************************************************************/
|
||
|
|
||
|
#include "gdscript_tokenizer_buffer.h"
|
||
|
|
||
|
#include "core/io/compression.h"
|
||
|
#include "core/io/marshalls.h"
|
||
|
|
||
|
#define TOKENIZER_VERSION 100
|
||
|
|
||
|
int GDScriptTokenizerBuffer::_token_to_binary(const Token &p_token, Vector<uint8_t> &r_buffer, int p_start, HashMap<StringName, uint32_t> &r_identifiers_map, HashMap<Variant, uint32_t, VariantHasher, VariantComparator> &r_constants_map) {
|
||
|
int pos = p_start;
|
||
|
|
||
|
int token_type = p_token.type & TOKEN_MASK;
|
||
|
|
||
|
switch (p_token.type) {
|
||
|
case GDScriptTokenizer::Token::ANNOTATION:
|
||
|
case GDScriptTokenizer::Token::IDENTIFIER: {
|
||
|
// Add identifier to map.
|
||
|
int identifier_pos;
|
||
|
StringName id = p_token.get_identifier();
|
||
|
if (r_identifiers_map.has(id)) {
|
||
|
identifier_pos = r_identifiers_map[id];
|
||
|
} else {
|
||
|
identifier_pos = r_identifiers_map.size();
|
||
|
r_identifiers_map[id] = identifier_pos;
|
||
|
}
|
||
|
token_type |= identifier_pos << TOKEN_BITS;
|
||
|
} break;
|
||
|
case GDScriptTokenizer::Token::ERROR:
|
||
|
case GDScriptTokenizer::Token::LITERAL: {
|
||
|
// Add literal to map.
|
||
|
int constant_pos;
|
||
|
if (r_constants_map.has(p_token.literal)) {
|
||
|
constant_pos = r_constants_map[p_token.literal];
|
||
|
} else {
|
||
|
constant_pos = r_constants_map.size();
|
||
|
r_constants_map[p_token.literal] = constant_pos;
|
||
|
}
|
||
|
token_type |= constant_pos << TOKEN_BITS;
|
||
|
} break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Encode token.
|
||
|
int token_len;
|
||
|
if (token_type & TOKEN_MASK) {
|
||
|
token_len = 8;
|
||
|
r_buffer.resize(pos + token_len);
|
||
|
encode_uint32(token_type | TOKEN_BYTE_MASK, &r_buffer.write[pos]);
|
||
|
pos += 4;
|
||
|
} else {
|
||
|
token_len = 5;
|
||
|
r_buffer.resize(pos + token_len);
|
||
|
r_buffer.write[pos] = token_type;
|
||
|
pos++;
|
||
|
}
|
||
|
encode_uint32(p_token.start_line, &r_buffer.write[pos]);
|
||
|
return token_len;
|
||
|
}
|
||
|
|
||
|
GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t *p_buffer) {
|
||
|
Token token;
|
||
|
const uint8_t *b = p_buffer;
|
||
|
|
||
|
uint32_t token_type = decode_uint32(b);
|
||
|
token.type = (Token::Type)(token_type & TOKEN_MASK);
|
||
|
if (token_type & TOKEN_BYTE_MASK) {
|
||
|
b += 4;
|
||
|
} else {
|
||
|
b++;
|
||
|
}
|
||
|
token.start_line = decode_uint32(b);
|
||
|
token.end_line = token.start_line;
|
||
|
|
||
|
token.literal = token.get_name();
|
||
|
if (token.type == Token::CONST_NAN) {
|
||
|
token.literal = String("NAN"); // Special case since name and notation are different.
|
||
|
}
|
||
|
|
||
|
switch (token.type) {
|
||
|
case GDScriptTokenizer::Token::ANNOTATION:
|
||
|
case GDScriptTokenizer::Token::IDENTIFIER: {
|
||
|
// Get name from map.
|
||
|
int identifier_pos = token_type >> TOKEN_BITS;
|
||
|
if (unlikely(identifier_pos >= identifiers.size())) {
|
||
|
Token error;
|
||
|
error.type = Token::ERROR;
|
||
|
error.literal = "Identifier index out of bounds.";
|
||
|
return error;
|
||
|
}
|
||
|
token.literal = identifiers[identifier_pos];
|
||
|
} break;
|
||
|
case GDScriptTokenizer::Token::ERROR:
|
||
|
case GDScriptTokenizer::Token::LITERAL: {
|
||
|
// Get literal from map.
|
||
|
int constant_pos = token_type >> TOKEN_BITS;
|
||
|
if (unlikely(constant_pos >= constants.size())) {
|
||
|
Token error;
|
||
|
error.type = Token::ERROR;
|
||
|
error.literal = "Constant index out of bounds.";
|
||
|
return error;
|
||
|
}
|
||
|
token.literal = constants[constant_pos];
|
||
|
} break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
|
||
|
const uint8_t *buf = p_buffer.ptr();
|
||
|
ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
|
||
|
|
||
|
int version = decode_uint32(&buf[4]);
|
||
|
ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version.");
|
||
|
|
||
|
int decompressed_size = decode_uint32(&buf[8]);
|
||
|
|
||
|
Vector<uint8_t> contents;
|
||
|
if (decompressed_size == 0) {
|
||
|
contents = p_buffer.slice(12);
|
||
|
} else {
|
||
|
contents.resize(decompressed_size);
|
||
|
int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);
|
||
|
ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");
|
||
|
}
|
||
|
|
||
|
int total_len = contents.size();
|
||
|
buf = contents.ptr();
|
||
|
uint32_t identifier_count = decode_uint32(&buf[0]);
|
||
|
uint32_t constant_count = decode_uint32(&buf[4]);
|
||
|
uint32_t token_line_count = decode_uint32(&buf[8]);
|
||
|
uint32_t token_count = decode_uint32(&buf[16]);
|
||
|
|
||
|
const uint8_t *b = &buf[20];
|
||
|
total_len -= 20;
|
||
|
|
||
|
identifiers.resize(identifier_count);
|
||
|
for (uint32_t i = 0; i < identifier_count; i++) {
|
||
|
uint32_t len = decode_uint32(b);
|
||
|
total_len -= 4;
|
||
|
ERR_FAIL_COND_V((len * 4u) > (uint32_t)total_len, ERR_INVALID_DATA);
|
||
|
b += 4;
|
||
|
Vector<uint32_t> cs;
|
||
|
cs.resize(len);
|
||
|
for (uint32_t j = 0; j < len; j++) {
|
||
|
uint8_t tmp[4];
|
||
|
for (uint32_t k = 0; k < 4; k++) {
|
||
|
tmp[k] = b[j * 4 + k] ^ 0xb6;
|
||
|
}
|
||
|
cs.write[j] = decode_uint32(tmp);
|
||
|
}
|
||
|
|
||
|
String s(reinterpret_cast<const char32_t *>(cs.ptr()), len);
|
||
|
b += len * 4;
|
||
|
total_len -= len * 4;
|
||
|
identifiers.write[i] = s;
|
||
|
}
|
||
|
|
||
|
constants.resize(constant_count);
|
||
|
for (uint32_t i = 0; i < constant_count; i++) {
|
||
|
Variant v;
|
||
|
int len;
|
||
|
Error err = decode_variant(v, b, total_len, &len, false);
|
||
|
if (err) {
|
||
|
return err;
|
||
|
}
|
||
|
b += len;
|
||
|
total_len -= len;
|
||
|
constants.write[i] = v;
|
||
|
}
|
||
|
|
||
|
for (uint32_t i = 0; i < token_line_count; i++) {
|
||
|
ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);
|
||
|
uint32_t token_index = decode_uint32(b);
|
||
|
b += 4;
|
||
|
uint32_t line = decode_uint32(b);
|
||
|
b += 4;
|
||
|
total_len -= 8;
|
||
|
token_lines[token_index] = line;
|
||
|
}
|
||
|
for (uint32_t i = 0; i < token_line_count; i++) {
|
||
|
ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);
|
||
|
uint32_t token_index = decode_uint32(b);
|
||
|
b += 4;
|
||
|
uint32_t column = decode_uint32(b);
|
||
|
b += 4;
|
||
|
total_len -= 8;
|
||
|
token_columns[token_index] = column;
|
||
|
}
|
||
|
|
||
|
tokens.resize(token_count);
|
||
|
for (uint32_t i = 0; i < token_count; i++) {
|
||
|
int token_len = 5;
|
||
|
if ((*b) & TOKEN_BYTE_MASK) {
|
||
|
token_len = 8;
|
||
|
}
|
||
|
ERR_FAIL_COND_V(total_len < token_len, ERR_INVALID_DATA);
|
||
|
Token token = _binary_to_token(b);
|
||
|
b += token_len;
|
||
|
ERR_FAIL_INDEX_V(token.type, Token::TK_MAX, ERR_INVALID_DATA);
|
||
|
tokens.write[i] = token;
|
||
|
total_len -= token_len;
|
||
|
}
|
||
|
|
||
|
ERR_FAIL_COND_V(total_len > 0, ERR_INVALID_DATA);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {
|
||
|
HashMap<StringName, uint32_t> identifier_map;
|
||
|
HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;
|
||
|
Vector<uint8_t> token_buffer;
|
||
|
HashMap<uint32_t, uint32_t> token_lines;
|
||
|
HashMap<uint32_t, uint32_t> token_columns;
|
||
|
|
||
|
GDScriptTokenizerText tokenizer;
|
||
|
tokenizer.set_source_code(p_code);
|
||
|
tokenizer.set_multiline_mode(true); // Ignore whitespace tokens.
|
||
|
Token current = tokenizer.scan();
|
||
|
int token_pos = 0;
|
||
|
int last_token_line = 0;
|
||
|
int token_counter = 0;
|
||
|
|
||
|
while (current.type != Token::TK_EOF) {
|
||
|
int token_len = _token_to_binary(current, token_buffer, token_pos, identifier_map, constant_map);
|
||
|
token_pos += token_len;
|
||
|
if (token_counter > 0 && current.start_line > last_token_line) {
|
||
|
token_lines[token_counter] = current.start_line;
|
||
|
token_columns[token_counter] = current.start_column;
|
||
|
}
|
||
|
last_token_line = current.end_line;
|
||
|
|
||
|
current = tokenizer.scan();
|
||
|
token_counter++;
|
||
|
}
|
||
|
|
||
|
// Reverse maps.
|
||
|
Vector<StringName> rev_identifier_map;
|
||
|
rev_identifier_map.resize(identifier_map.size());
|
||
|
for (const KeyValue<StringName, uint32_t> &E : identifier_map) {
|
||
|
rev_identifier_map.write[E.value] = E.key;
|
||
|
}
|
||
|
Vector<Variant> rev_constant_map;
|
||
|
rev_constant_map.resize(constant_map.size());
|
||
|
for (const KeyValue<Variant, uint32_t> &E : constant_map) {
|
||
|
rev_constant_map.write[E.value] = E.key;
|
||
|
}
|
||
|
HashMap<uint32_t, uint32_t> rev_token_lines;
|
||
|
for (const KeyValue<uint32_t, uint32_t> &E : token_lines) {
|
||
|
rev_token_lines[E.value] = E.key;
|
||
|
}
|
||
|
|
||
|
// Remove continuation lines from map.
|
||
|
for (int line : tokenizer.get_continuation_lines()) {
|
||
|
if (rev_token_lines.has(line)) {
|
||
|
token_lines.erase(rev_token_lines[line]);
|
||
|
token_columns.erase(rev_token_lines[line]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Vector<uint8_t> contents;
|
||
|
contents.resize(20);
|
||
|
encode_uint32(identifier_map.size(), &contents.write[0]);
|
||
|
encode_uint32(constant_map.size(), &contents.write[4]);
|
||
|
encode_uint32(token_lines.size(), &contents.write[8]);
|
||
|
encode_uint32(token_counter, &contents.write[16]);
|
||
|
|
||
|
int buf_pos = 20;
|
||
|
|
||
|
// Save identifiers.
|
||
|
for (const StringName &id : rev_identifier_map) {
|
||
|
String s = id.operator String();
|
||
|
int len = s.length();
|
||
|
|
||
|
contents.resize(buf_pos + (len + 1) * 4);
|
||
|
|
||
|
encode_uint32(len, &contents.write[buf_pos]);
|
||
|
buf_pos += 4;
|
||
|
|
||
|
for (int i = 0; i < len; i++) {
|
||
|
uint8_t tmp[4];
|
||
|
encode_uint32(s[i], tmp);
|
||
|
|
||
|
for (int b = 0; b < 4; b++) {
|
||
|
contents.write[buf_pos + b] = tmp[b] ^ 0xb6;
|
||
|
}
|
||
|
|
||
|
buf_pos += 4;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Save constants.
|
||
|
for (const Variant &v : rev_constant_map) {
|
||
|
int len;
|
||
|
// Objects cannot be constant, never encode objects.
|
||
|
Error err = encode_variant(v, nullptr, len, false);
|
||
|
ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
|
||
|
contents.resize(buf_pos + len);
|
||
|
encode_variant(v, &contents.write[buf_pos], len, false);
|
||
|
buf_pos += len;
|
||
|
}
|
||
|
|
||
|
// Save lines and columns.
|
||
|
contents.resize(buf_pos + token_lines.size() * 16);
|
||
|
for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {
|
||
|
encode_uint32(e.key, &contents.write[buf_pos]);
|
||
|
buf_pos += 4;
|
||
|
encode_uint32(e.value, &contents.write[buf_pos]);
|
||
|
buf_pos += 4;
|
||
|
}
|
||
|
for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {
|
||
|
encode_uint32(e.key, &contents.write[buf_pos]);
|
||
|
buf_pos += 4;
|
||
|
encode_uint32(e.value, &contents.write[buf_pos]);
|
||
|
buf_pos += 4;
|
||
|
}
|
||
|
|
||
|
// Store tokens.
|
||
|
contents.append_array(token_buffer);
|
||
|
|
||
|
Vector<uint8_t> buf;
|
||
|
|
||
|
// Save header.
|
||
|
buf.resize(12);
|
||
|
buf.write[0] = 'G';
|
||
|
buf.write[1] = 'D';
|
||
|
buf.write[2] = 'S';
|
||
|
buf.write[3] = 'C';
|
||
|
encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
|
||
|
|
||
|
switch (p_compress_mode) {
|
||
|
case COMPRESS_NONE:
|
||
|
encode_uint32(0u, &buf.write[8]);
|
||
|
buf.append_array(contents);
|
||
|
break;
|
||
|
|
||
|
case COMPRESS_ZSTD: {
|
||
|
encode_uint32(contents.size(), &buf.write[8]);
|
||
|
Vector<uint8_t> compressed;
|
||
|
int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);
|
||
|
compressed.resize(max_size);
|
||
|
|
||
|
int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);
|
||
|
ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");
|
||
|
compressed.resize(compressed_size);
|
||
|
|
||
|
buf.append_array(compressed);
|
||
|
} break;
|
||
|
}
|
||
|
|
||
|
return buf;
|
||
|
}
|
||
|
|
||
|
int GDScriptTokenizerBuffer::get_cursor_line() const {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int GDScriptTokenizerBuffer::get_cursor_column() const {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void GDScriptTokenizerBuffer::set_cursor_position(int p_line, int p_column) {
|
||
|
}
|
||
|
|
||
|
void GDScriptTokenizerBuffer::set_multiline_mode(bool p_state) {
|
||
|
multiline_mode = p_state;
|
||
|
}
|
||
|
|
||
|
bool GDScriptTokenizerBuffer::is_past_cursor() const {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
void GDScriptTokenizerBuffer::push_expression_indented_block() {
|
||
|
indent_stack_stack.push_back(indent_stack);
|
||
|
}
|
||
|
|
||
|
void GDScriptTokenizerBuffer::pop_expression_indented_block() {
|
||
|
ERR_FAIL_COND(indent_stack_stack.is_empty());
|
||
|
indent_stack = indent_stack_stack.back()->get();
|
||
|
indent_stack_stack.pop_back();
|
||
|
}
|
||
|
|
||
|
GDScriptTokenizer::Token GDScriptTokenizerBuffer::scan() {
|
||
|
// Add final newline.
|
||
|
if (current >= tokens.size() && !last_token_was_newline) {
|
||
|
Token newline;
|
||
|
newline.type = Token::NEWLINE;
|
||
|
newline.start_line = current_line;
|
||
|
newline.end_line = current_line;
|
||
|
last_token_was_newline = true;
|
||
|
return newline;
|
||
|
}
|
||
|
|
||
|
// Resolve pending indentation change.
|
||
|
if (pending_indents > 0) {
|
||
|
pending_indents--;
|
||
|
Token indent;
|
||
|
indent.type = Token::INDENT;
|
||
|
indent.start_line = current_line;
|
||
|
indent.end_line = current_line;
|
||
|
return indent;
|
||
|
} else if (pending_indents < 0) {
|
||
|
pending_indents++;
|
||
|
Token dedent;
|
||
|
dedent.type = Token::DEDENT;
|
||
|
dedent.start_line = current_line;
|
||
|
dedent.end_line = current_line;
|
||
|
return dedent;
|
||
|
}
|
||
|
|
||
|
if (current >= tokens.size()) {
|
||
|
if (!indent_stack.is_empty()) {
|
||
|
pending_indents -= indent_stack.size();
|
||
|
indent_stack.clear();
|
||
|
return scan();
|
||
|
}
|
||
|
Token eof;
|
||
|
eof.type = Token::TK_EOF;
|
||
|
return eof;
|
||
|
};
|
||
|
|
||
|
if (!last_token_was_newline && token_lines.has(current)) {
|
||
|
current_line = token_lines[current];
|
||
|
uint32_t current_column = token_columns[current];
|
||
|
|
||
|
// Check if there's a need to indent/dedent.
|
||
|
if (!multiline_mode) {
|
||
|
uint32_t previous_indent = 0;
|
||
|
if (!indent_stack.is_empty()) {
|
||
|
previous_indent = indent_stack.back()->get();
|
||
|
}
|
||
|
if (current_column - 1 > previous_indent) {
|
||
|
pending_indents++;
|
||
|
indent_stack.push_back(current_column - 1);
|
||
|
} else {
|
||
|
while (current_column - 1 < previous_indent) {
|
||
|
pending_indents--;
|
||
|
indent_stack.pop_back();
|
||
|
if (indent_stack.is_empty()) {
|
||
|
break;
|
||
|
}
|
||
|
previous_indent = indent_stack.back()->get();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Token newline;
|
||
|
newline.type = Token::NEWLINE;
|
||
|
newline.start_line = current_line;
|
||
|
newline.end_line = current_line;
|
||
|
last_token_was_newline = true;
|
||
|
|
||
|
return newline;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
last_token_was_newline = false;
|
||
|
|
||
|
Token token = tokens[current++];
|
||
|
return token;
|
||
|
}
|